From 818472a77173511ed7d252a0c06f9cdd1ad0a843 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Tue, 6 May 2025 18:04:48 -0600
Subject: [PATCH 01/65] Add configuration class for executions with a validated
 constructor.

---
 configuration.py | 403 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 403 insertions(+)
 create mode 100644 configuration.py

diff --git a/configuration.py b/configuration.py
new file mode 100644
index 0000000..ee3ad2e
--- /dev/null
+++ b/configuration.py
@@ -0,0 +1,403 @@
+from typing import List
+import os
+
+from enum import Enum
+
+
+class AlsMethod(str, Enum):
+    CONSTANT_INPUTS = "inconst"
+    CONSTANT_OUTPUTS = "outconst"
+    PROBRUN = "probrun"
+    SIGNIFICANCE = "significance"
+    CCARVING = "ccarving"
+    DECISION_TREE = "decision_tree"
+
+
+# List of iterative methods.
+_ITERATIVE_METHODS = [
+    AlsMethod.CONSTANT_INPUTS,
+    AlsMethod.CONSTANT_OUTPUTS,
+    AlsMethod.PROBRUN,
+    AlsMethod.SIGNIFICANCE,
+    AlsMethod.CCARVING,
+]
+
+# List of methods that aren't iterative but can be when doing resynthesis.
+_ITERATIVE_METHODS_WITH_RESYNTHESIS = [AlsMethod.DECISION_TREE]
+
+
+class ApproxSynthesisConfig:
+    """
+    Configuration class for running Approximate Logic Synthesis with different methods.
+
+    Parameters
+    ----------
+    method : AlsMethod | str
+        One of the supported methods. Can use the AlsMethod enum or one of the
+        following string names: 'inconst', 'outconst', 'probrun',
+        'significance', 'ccarving', or 'decision_tree'.
+
+    circuit_file : str | List[str]
+        Path(s) to the Verilog circuit file(s).
+
+    dataset_file : str | List[str]
+        Path(s) to the dataset file(s).
+
+        If multiple circuits were specified with `circuit_file`, this can be
+        given as a list which must match the `circuit_file` list length.
+        Each dataset will be used with each corresponding circuit.
+
+    generate_dataset : str | int | float | List[str | int | float], optional
+        If specified, the dataset file(s) given with dataset_file will be
+        generated on the spot. Overwriting any existing file(s).
+
+        Rule(s) to generate dataset. Can be:
+        - 'exhaustive' (only for circuits with number of inputs <= 32)
+        - an integer number of inputs
+        - a float percentage (0 < x <= 1)
+
+        If multiple datasets were specified with `dataset_file`, this option can
+        be given as a list which must match the `dataset_file` list length.
+        Each dataset will be generated with the corresponding settings.
+
+    dataset_fraction : int | float | List[int | float], optional
+        Subset of dataset to use. Cannot be used with `generate_dataset`.
+
+        Can be given as:
+        - an integer number of inputs (lower than dataset size)
+        - a float percentage (0 < x <= 1)
+
+        If multiple datasets were specified with `dataset_file`, this option
+        can be given as a list which must match the `dataset_file` list length.
+        Each dataset will be generated with the corresponding settings.
+
+    resynthesis : bool, default=False
+        Whether to use resynthesis.
+
+    error_threshold : float (0 < x <= 1)
+        The maximum error threshold permitted. Required for iterative methods,
+        like pruning methods or ML methods with resynthesis.
+
+    max_iters : int, optional
+        Maximum amount of iterations to execute. Used in iterative methods,
+        like pruning methods or ML methods with resynthesis.
+
+    save_file : str, optional
+        Path to file where configuration and progress of the run is saved.
+
+    continue_from_save : bool, default=False
+        Whether to continue from a saved file. If True, `save_file` option must
+        be provided. If the file doesn't exist, a new run will be started.
+
+    max_depth : int | List[int]
+        Required for 'decision_tree'. Can be a single int or list if multiple
+        circuits are specified. In which case the list must match the
+        `circuit_file` list length. Each circuit will use the corresponding max
+        depth for its decision tree.
+
+    one_tree_per_output : bool, default=False
+        Used only by 'decision_tree' method.
+        If True, uses a separate tree per output.
+        If False, uses a single multi-output tree.
+
+    show_tb_progress : bool, default=False
+        Whether to show simulation progress.
+    """
+
+    def __init__(
+        self,
+        method: str,
+        circuit_file: str | List[str],
+        dataset_file: str | List[str],
+        generate_dataset: str | int | float | List[str | int | float] | None = None,
+        dataset_fraction: int | float | List[int | float] | None = None,
+        resynthesis: bool = False,
+        error_threshold: float | None = None,
+        max_iters: int | None = None,
+        save_file: str | None = None,
+        continue_from_save: bool = False,
+        max_depth: int | List[int] | None = None,
+        one_tree_per_output: bool = False,
+        show_tb_progress: bool = False,
+    ):
+        self.method = method
+        self.circuit_file = circuit_file
+        self.dataset_file = dataset_file
+        self.generate_dataset = generate_dataset
+        self.dataset_fraction = dataset_fraction
+        self.resynthesis = resynthesis
+        self.error_threshold = error_threshold
+        self.max_iters = max_iters
+        self.save_file = save_file
+        self.continue_from_save = continue_from_save
+        self.max_depth = max_depth
+        self.one_tree_per_output = one_tree_per_output
+        self.show_progress = show_tb_progress
+        validate_config(self)
+
+
+def validate_config(config: ApproxSynthesisConfig):
+    """
+    Validates the given ApproxSynthesisConfig. For more details on how each
+    parameter is checked, refer to the ApproxSynthesisConfig docs or to each
+    `_validate...` function's docs.
+
+    Raises
+    ------
+    ValueError
+        If required parameters are missing or invalid.
+    """
+
+    _validate_method(config)
+    _validate_circuit_files(config)
+    _validate_dataset_files(config)
+    _validate_dataset_vs_generate_exclusivity(config)
+    _validate_generate_dataset(config)
+    _validate_dataset_fraction(config)
+    _validate_error_threshold(config)
+    _validate_max_iters(config)
+    _validate_max_depth(config)
+    _validate_continue_from_save(config)
+
+
+def _validate_method(config: ApproxSynthesisConfig):
+    """
+    Validates the synthesis method.
+
+    If `method` is a string, tries to convert it to an AlsMethod enum.
+    Raises a ValueError if the method name is invalid.
+
+    Ensures consistency for downstream logic by enforcing enum usage.
+    """
+    if isinstance(config.method, str):
+        try:
+            config.method = AlsMethod(config.method)
+        except ValueError:
+            available_methods = ", ".join([method.value for method in AlsMethod])
+            raise ValueError(
+                f"{config.method} is not a valid {AlsMethod.__name__}. Available methods are: {available_methods}"
+            )
+
+
+def _validate_circuit_files(config: ApproxSynthesisConfig):
+    """
+    Validates the existence of circuit files.
+
+    Checks if each file path in `circuit_file` exists. If not, raises a ValueError.
+    Ensures input Verilog files are valid for further processing.
+    """
+    circuits = config.circuit_file
+    circuits = circuits if isinstance(circuits, list) else [circuits]
+    for circuit in circuits:
+        if not os.path.isfile(circuit):
+            raise ValueError(f"Circuit file does not exist: {circuit}")
+
+
+def _validate_dataset_files(config: ApproxSynthesisConfig):
+    """
+    Validates dataset files.
+
+    Ensures each dataset file exists, unless 'generate_dataset' is set.
+
+    Uses `_ensure_length_match` to validate correspondence with circuit files.
+    Raises a ValueError if any required dataset file is missing.
+    """
+    dataset_files = _ensure_length_match(
+        config.dataset_file, config.circuit_file, "dataset_file", "circuit_file"
+    )
+    if config.generate_dataset is None:
+        for f in dataset_files:
+            if not os.path.isfile(f):
+                raise ValueError(
+                    f"Dataset file not found: {f}. Use 'generate_dataset' or provide a valid file."
+                )
+
+
+def _validate_dataset_vs_generate_exclusivity(config: ApproxSynthesisConfig):
+    """
+    Ensures mutual exclusivity between 'generate_dataset' and 'dataset_fraction'.
+
+    Both fields are incompatible; only one may be provided.
+    Raises a ValueError if both are given.
+    """
+    if config.generate_dataset is not None and config.dataset_fraction is not None:
+        raise ValueError(
+            "Cannot specify both 'generate_dataset' and 'dataset_fraction'."
+        )
+
+
+def _validate_generate_dataset(config: ApproxSynthesisConfig):
+    """
+    Validates 'generate_dataset' parameters.
+
+    Ensures that each generation rule is valid:
+    - str: must be 'exhaustive'
+    - int: must be > 0
+    - float: must be in (0, 1]
+
+    Uses `_ensure_length_match` to align with dataset files.
+    Raises ValueError for invalid values.
+    """
+    if config.generate_dataset is not None:
+        gens = _ensure_length_match(
+            config.generate_dataset,
+            config.dataset_file,
+            "generate_dataset",
+            "dataset_file",
+        )
+        for g in gens:
+            if isinstance(g, str):
+                if g != "exhaustive":
+                    raise ValueError(f"Invalid generate_dataset string: {g}")
+                # TODO: Check number of inputs in circuit (must be <= 32)
+            elif isinstance(g, int):
+                if g <= 0:
+                    raise ValueError("Integer generate_dataset must be > 0")
+                # TODO: Check it's <= max input count of the circuit
+            elif isinstance(g, float):
+                if not (0 < g <= 1):
+                    raise ValueError(
+                        "Percentage generate_dataset must be between 0 and 1."
+                    )
+            else:
+                raise ValueError(f"Invalid generate_dataset value: {g}")
+
+
+def _validate_dataset_fraction(config: ApproxSynthesisConfig):
+    """
+    Validates 'dataset_fraction'.
+
+    Ensures values are valid:
+    - int: must be > 0
+    - float: must be in (0, 1]
+
+    Uses `_ensure_length_match` to align with dataset files.
+    Raises ValueError for out-of-range values.
+    """
+    if config.dataset_fraction is not None:
+        fracs = _ensure_length_match(
+            config.dataset_fraction,
+            config.dataset_file,
+            "dataset_fraction",
+            "dataset_file",
+        )
+        for f in fracs:
+            if isinstance(f, int) and f <= 0:
+                raise ValueError("dataset_fraction as int must be > 0")
+            elif isinstance(f, float) and not (0 < f <= 1):
+                raise ValueError(
+                    "dataset_fraction as percentage must be between 0 and 1"
+                )
+            # TODO: Check dataset size to ensure integer fraction < full dataset
+
+
+def _validate_error_threshold(config: ApproxSynthesisConfig):
+    """
+    Validates 'error_threshold'.
+
+    Required for:
+    - all iterative methods
+    - methods that become iterative with resynthesis, like decision_tree
+
+    Raises ValueError if missing in those cases.
+    """
+    if config.method in _ITERATIVE_METHODS:
+        if config.error_threshold is None:
+            raise ValueError(
+                f"'error_threshold' is required for method {config.method}"
+            )
+    elif (
+        config.method in _ITERATIVE_METHODS_WITH_RESYNTHESIS
+        and config.resynthesis
+        and config.error_threshold is None
+    ):
+        raise ValueError(
+            f"'error_threshold' is required for method {config.method} with resynthesis"
+        )
+
+
+def _validate_max_iters(config: ApproxSynthesisConfig):
+    """
+    Validates 'max_iters'.
+
+    Required only for methods that become iterative under resynthesis,
+    like decision_tree; because they might never reach the error threshold.
+    Raises ValueError if missing in that case.
+    """
+    if (
+        config.method in _ITERATIVE_METHODS_WITH_RESYNTHESIS
+        and config.resynthesis
+        and config.max_iters is None
+    ):
+        raise ValueError(
+            f"'max_iters' is required for {config.method} with resynthesis"
+        )
+
+
+def _validate_max_depth(config: ApproxSynthesisConfig):
+    """
+    Validates 'max_depth' for decision trees.
+
+    Ensures it is provided for the 'decision_tree' method and aligns with
+    the number of circuit files if given as a list.
+    Raises ValueError if missing or mismatched.
+    """
+    if config.method == AlsMethod.DECISION_TREE:
+        if config.max_depth is None:
+            raise ValueError(f"'max_depth' is required for method f{config.method}.")
+        else:
+            _ensure_length_match(
+                config.max_depth, config.circuit_file, "max_depth", "circuit_file"
+            )
+
+
+def _validate_continue_from_save(config: ApproxSynthesisConfig):
+    """
+    Validates parameters for continuing from a saved file.
+
+    Ensures 'save_file' is provided and exists on disk when
+    'continue_from_save' is True. Raises ValueError otherwise.
+    """
+    if config.continue_from_save:
+        if config.save_file is None:
+            raise ValueError("To continue from save, 'save_file' must be provided.")
+        if not os.path.isfile(config.save_file):
+            raise ValueError("To continue from save, 'save_file' must exist.")
+
+
+def _ensure_length_match(values, ref_values, field_name, ref_name):
+    """
+    Ensures list parameters match reference list length.
+
+    Converts scalar to list and compares length if both are lists.
+    Raises ValueError if mismatched or list is given with scalar reference.
+
+    Parameters
+    ----------
+    values : Any
+        Parameter to validate (scalar or list).
+    ref_values : Any
+        Reference parameter (scalar or list).
+    field_name : str
+        Name of the field being validated (for error messages).
+    ref_name : str
+        Name of the reference field (for error messages).
+
+    Returns
+    -------
+    List[Any]
+        The validated values, always as a list.
+    """
+    if isinstance(values, list):
+        if isinstance(ref_values, list):
+            if len(values) != len(ref_values):
+                raise ValueError(
+                    f"'{field_name}' length ({len(values)}) must match {ref_name} count ({len(ref_values)})"
+                )
+        else:
+            raise ValueError(
+                f"'{field_name}' can't be given as a list if {ref_name} isn't a list."
+            )
+        return values
+    else:
+        return [values]

From 07ac36793e30ba766a24a5d0142acf29ca916862 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Wed, 7 May 2025 14:29:25 -0600
Subject: [PATCH 02/65] Improve validation of struct so that the fields end up
 in the most convenient format to use later

---
 configuration.py | 235 ++++++++++++++++++++++++++++-------------------
 1 file changed, 143 insertions(+), 92 deletions(-)

diff --git a/configuration.py b/configuration.py
index ee3ad2e..b2a9a39 100644
--- a/configuration.py
+++ b/configuration.py
@@ -32,7 +32,7 @@ class ApproxSynthesisConfig:
 
     Parameters
     ----------
-    method : AlsMethod | str
+    method : AlsMethod
         One of the supported methods. Can use the AlsMethod enum or one of the
         following string names: 'inconst', 'outconst', 'probrun',
         'significance', 'ccarving', or 'decision_tree'.
@@ -104,9 +104,26 @@ class ApproxSynthesisConfig:
         Whether to show simulation progress.
     """
 
+    # After instantiation/validation the method is always turned into an
+    # AlsMethod and anything that can be a list is turned into a list. For ease
+    # of use of this struct in the runner code.
+    method: AlsMethod
+    circuit_file: List[str]
+    dataset_file: List[str]
+    generate_dataset: List[str | int | float] | None
+    dataset_fraction: List[int | float] | None
+    resynthesis: bool
+    error_threshold: float | None
+    max_iters: int | None
+    save_file: str | None
+    continue_from_save: bool
+    max_depth: List[int] | None
+    one_tree_per_output: bool
+    show_tb_progress: bool
+
     def __init__(
         self,
-        method: str,
+        method: AlsMethod | str,
         circuit_file: str | List[str],
         dataset_file: str | List[str],
         generate_dataset: str | int | float | List[str | int | float] | None = None,
@@ -120,47 +137,48 @@ def __init__(
         one_tree_per_output: bool = False,
         show_tb_progress: bool = False,
     ):
-        self.method = method
-        self.circuit_file = circuit_file
-        self.dataset_file = dataset_file
-        self.generate_dataset = generate_dataset
-        self.dataset_fraction = dataset_fraction
+        """
+        Instantiate and validate an ApproxSynthesisConfig.
+
+        Raises
+        ------
+        ValueError
+            If required parameters are missing or invalid.
+        """
+        self.method = _validate_method(method)
+        self.circuit_file = _validate_circuit_files(circuit_file)
+        self.dataset_file = _validate_dataset_files(
+            dataset_file, self.circuit_file, generate_dataset
+        )
+
+        _validate_dataset_fraction_vs_generate_exclusivity(
+            generate_dataset, dataset_fraction
+        )
+
+        self.generate_dataset = _validate_generate_dataset(
+            generate_dataset, self.dataset_file
+        )
+        self.dataset_fraction = _validate_dataset_fraction(
+            dataset_fraction, self.dataset_file
+        )
+
         self.resynthesis = resynthesis
-        self.error_threshold = error_threshold
-        self.max_iters = max_iters
-        self.save_file = save_file
+        self.error_threshold = _validate_error_threshold(
+            error_threshold, self.method, self.resynthesis
+        )
+        self.max_iters = _validate_max_iters(max_iters, self.method, self.resynthesis)
+        self.max_depth = _validate_max_depth(max_depth, self.method, self.circuit_file)
+        self.save_file = _validate_save_file(save_file, continue_from_save)
         self.continue_from_save = continue_from_save
-        self.max_depth = max_depth
         self.one_tree_per_output = one_tree_per_output
         self.show_progress = show_tb_progress
-        validate_config(self)
 
+    def __repr__(self):
+        fields = ", ".join(f"{key}={value!r}" for key, value in self.__dict__.items())
+        return f"{self.__class__.__name__}({fields})"
 
-def validate_config(config: ApproxSynthesisConfig):
-    """
-    Validates the given ApproxSynthesisConfig. For more details on how each
-    parameter is checked, refer to the ApproxSynthesisConfig docs or to each
-    `_validate...` function's docs.
-
-    Raises
-    ------
-    ValueError
-        If required parameters are missing or invalid.
-    """
 
-    _validate_method(config)
-    _validate_circuit_files(config)
-    _validate_dataset_files(config)
-    _validate_dataset_vs_generate_exclusivity(config)
-    _validate_generate_dataset(config)
-    _validate_dataset_fraction(config)
-    _validate_error_threshold(config)
-    _validate_max_iters(config)
-    _validate_max_depth(config)
-    _validate_continue_from_save(config)
-
-
-def _validate_method(config: ApproxSynthesisConfig):
+def _validate_method(method: AlsMethod | str) -> AlsMethod:
     """
     Validates the synthesis method.
 
@@ -169,31 +187,38 @@ def _validate_method(config: ApproxSynthesisConfig):
 
     Ensures consistency for downstream logic by enforcing enum usage.
     """
-    if isinstance(config.method, str):
+    if isinstance(method, str):
         try:
-            config.method = AlsMethod(config.method)
+            method = AlsMethod(method)
         except ValueError:
             available_methods = ", ".join([method.value for method in AlsMethod])
             raise ValueError(
-                f"{config.method} is not a valid {AlsMethod.__name__}. Available methods are: {available_methods}"
+                f"{method} is not a valid {AlsMethod.__name__}. Available methods are: {available_methods}"
             )
 
+    return method
+
 
-def _validate_circuit_files(config: ApproxSynthesisConfig):
+def _validate_circuit_files(circuits: str | List[str]) -> List[str]:
     """
     Validates the existence of circuit files.
 
     Checks if each file path in `circuit_file` exists. If not, raises a ValueError.
     Ensures input Verilog files are valid for further processing.
     """
-    circuits = config.circuit_file
     circuits = circuits if isinstance(circuits, list) else [circuits]
     for circuit in circuits:
         if not os.path.isfile(circuit):
             raise ValueError(f"Circuit file does not exist: {circuit}")
 
+    return circuits
 
-def _validate_dataset_files(config: ApproxSynthesisConfig):
+
+def _validate_dataset_files(
+    dataset_files: str | List[str],
+    circuit_files: List[str],
+    generate_dataset: str | int | float | List[str | int | float] | None,
+) -> List[str]:
     """
     Validates dataset files.
 
@@ -203,30 +228,37 @@ def _validate_dataset_files(config: ApproxSynthesisConfig):
     Raises a ValueError if any required dataset file is missing.
     """
     dataset_files = _ensure_length_match(
-        config.dataset_file, config.circuit_file, "dataset_file", "circuit_file"
+        dataset_files, circuit_files, "dataset_file", "circuit_file"
     )
-    if config.generate_dataset is None:
+    if generate_dataset is None:
         for f in dataset_files:
             if not os.path.isfile(f):
                 raise ValueError(
                     f"Dataset file not found: {f}. Use 'generate_dataset' or provide a valid file."
                 )
+    return dataset_files
 
 
-def _validate_dataset_vs_generate_exclusivity(config: ApproxSynthesisConfig):
+def _validate_dataset_fraction_vs_generate_exclusivity(
+    generate_dataset: str | int | float | List[str | int | float] | None,
+    dataset_fraction: int | float | List[int | float] | None,
+):
     """
     Ensures mutual exclusivity between 'generate_dataset' and 'dataset_fraction'.
 
     Both fields are incompatible; only one may be provided.
     Raises a ValueError if both are given.
     """
-    if config.generate_dataset is not None and config.dataset_fraction is not None:
+    if generate_dataset is not None and dataset_fraction is not None:
         raise ValueError(
             "Cannot specify both 'generate_dataset' and 'dataset_fraction'."
         )
 
 
-def _validate_generate_dataset(config: ApproxSynthesisConfig):
+def _validate_generate_dataset(
+    generate_dataset: str | int | float | List[str | int | float] | None,
+    dataset_files: List[str],
+) -> List[str | int | float] | None:
     """
     Validates 'generate_dataset' parameters.
 
@@ -238,10 +270,10 @@ def _validate_generate_dataset(config: ApproxSynthesisConfig):
     Uses `_ensure_length_match` to align with dataset files.
     Raises ValueError for invalid values.
     """
-    if config.generate_dataset is not None:
+    if generate_dataset is not None:
         gens = _ensure_length_match(
-            config.generate_dataset,
-            config.dataset_file,
+            generate_dataset,
+            dataset_files,
             "generate_dataset",
             "dataset_file",
         )
@@ -261,9 +293,13 @@ def _validate_generate_dataset(config: ApproxSynthesisConfig):
                     )
             else:
                 raise ValueError(f"Invalid generate_dataset value: {g}")
+        return gens
 
 
-def _validate_dataset_fraction(config: ApproxSynthesisConfig):
+def _validate_dataset_fraction(
+    dataset_fraction: int | float | List[int | float] | None,
+    dataset_files: List[str],
+) -> List[int | float] | None:
     """
     Validates 'dataset_fraction'.
 
@@ -274,10 +310,10 @@ def _validate_dataset_fraction(config: ApproxSynthesisConfig):
     Uses `_ensure_length_match` to align with dataset files.
     Raises ValueError for out-of-range values.
     """
-    if config.dataset_fraction is not None:
+    if dataset_fraction is not None:
         fracs = _ensure_length_match(
-            config.dataset_fraction,
-            config.dataset_file,
+            dataset_fraction,
+            dataset_files,
             "dataset_fraction",
             "dataset_file",
         )
@@ -289,9 +325,14 @@ def _validate_dataset_fraction(config: ApproxSynthesisConfig):
                     "dataset_fraction as percentage must be between 0 and 1"
                 )
             # TODO: Check dataset size to ensure integer fraction < full dataset
+        return fracs
 
 
-def _validate_error_threshold(config: ApproxSynthesisConfig):
+def _validate_error_threshold(
+    error_threshold: float | None,
+    method: AlsMethod,
+    resynthesis: bool,
+) -> float | None:
     """
     Validates 'error_threshold'.
 
@@ -301,22 +342,24 @@ def _validate_error_threshold(config: ApproxSynthesisConfig):
 
     Raises ValueError if missing in those cases.
     """
-    if config.method in _ITERATIVE_METHODS:
-        if config.error_threshold is None:
-            raise ValueError(
-                f"'error_threshold' is required for method {config.method}"
-            )
+    if method in _ITERATIVE_METHODS:
+        if error_threshold is None:
+            raise ValueError(f"'error_threshold' is required for method {method}")
     elif (
-        config.method in _ITERATIVE_METHODS_WITH_RESYNTHESIS
-        and config.resynthesis
-        and config.error_threshold is None
+        method in _ITERATIVE_METHODS_WITH_RESYNTHESIS
+        and resynthesis
+        and error_threshold is None
     ):
         raise ValueError(
-            f"'error_threshold' is required for method {config.method} with resynthesis"
+            f"'error_threshold' is required for method {method} with resynthesis"
         )
 
+    return error_threshold
+
 
-def _validate_max_iters(config: ApproxSynthesisConfig):
+def _validate_max_iters(
+    max_iters: int | None, method: AlsMethod, resynthesis: bool
+) -> int | None:
     """
     Validates 'max_iters'.
 
@@ -325,16 +368,16 @@ def _validate_max_iters(config: ApproxSynthesisConfig):
     Raises ValueError if missing in that case.
     """
     if (
-        config.method in _ITERATIVE_METHODS_WITH_RESYNTHESIS
-        and config.resynthesis
-        and config.max_iters is None
+        method in _ITERATIVE_METHODS_WITH_RESYNTHESIS
+        and resynthesis
+        and max_iters is None
     ):
-        raise ValueError(
-            f"'max_iters' is required for {config.method} with resynthesis"
-        )
+        raise ValueError(f"'max_iters' is required for {method} with resynthesis")
 
 
-def _validate_max_depth(config: ApproxSynthesisConfig):
+def _validate_max_depth(
+    max_depth: int | List[int] | None, method: AlsMethod, circuit_files: List[str]
+):
     """
     Validates 'max_depth' for decision trees.
 
@@ -342,36 +385,48 @@ def _validate_max_depth(config: ApproxSynthesisConfig):
     the number of circuit files if given as a list.
     Raises ValueError if missing or mismatched.
     """
-    if config.method == AlsMethod.DECISION_TREE:
-        if config.max_depth is None:
-            raise ValueError(f"'max_depth' is required for method f{config.method}.")
+    if method == AlsMethod.DECISION_TREE:
+        if max_depth is None:
+            raise ValueError(f"'max_depth' is required for method f{method}.")
         else:
-            _ensure_length_match(
-                config.max_depth, config.circuit_file, "max_depth", "circuit_file"
-            )
+            _ensure_length_match(max_depth, circuit_files, "max_depth", "circuit_file")
 
 
-def _validate_continue_from_save(config: ApproxSynthesisConfig):
+def _validate_save_file(save_file: str | None, continue_from_save: bool) -> str | None:
     """
     Validates parameters for continuing from a saved file.
 
     Ensures 'save_file' is provided and exists on disk when
     'continue_from_save' is True. Raises ValueError otherwise.
     """
-    if config.continue_from_save:
-        if config.save_file is None:
+    if continue_from_save:
+        if save_file is None:
             raise ValueError("To continue from save, 'save_file' must be provided.")
-        if not os.path.isfile(config.save_file):
-            raise ValueError("To continue from save, 'save_file' must exist.")
+        if not os.path.isfile(save_file):
+            raise ValueError(
+                f"To continue from save, 'save_file' must exist: {save_file}"
+            )
 
+    return save_file
 
-def _ensure_length_match(values, ref_values, field_name, ref_name):
+
+def _ensure_length_match[T: str | int | float](
+    values: T | List[T],
+    ref_values: List[str],
+    field_name: str,
+    ref_name: str,
+) -> List[T]:
     """
     Ensures list parameters match reference list length.
 
     Converts scalar to list and compares length if both are lists.
     Raises ValueError if mismatched or list is given with scalar reference.
 
+    When converting scalar to list, the list returned contains N copies of the
+    scalar value, where N is the length of the reference list.
+    This is done so that the config object ends up with only lists that all are
+    of equal length, for easy handling in the runner code.
+
     Parameters
     ----------
     values : Any
@@ -389,15 +444,11 @@ def _ensure_length_match(values, ref_values, field_name, ref_name):
         The validated values, always as a list.
     """
     if isinstance(values, list):
-        if isinstance(ref_values, list):
-            if len(values) != len(ref_values):
-                raise ValueError(
-                    f"'{field_name}' length ({len(values)}) must match {ref_name} count ({len(ref_values)})"
-                )
-        else:
+        if len(values) != len(ref_values):
             raise ValueError(
-                f"'{field_name}' can't be given as a list if {ref_name} isn't a list."
+                f"'{field_name}' length ({len(values)}) must match {ref_name} count ({len(ref_values)})"
             )
         return values
     else:
-        return [values]
+        value: T = values
+        return [value for _ in ref_values]

From b244f36f10b4d91a51239efe9c62530edc778e63 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Wed, 7 May 2025 15:38:46 -0600
Subject: [PATCH 03/65] Add to write_tb flag to disable progress prints

---
 circuit.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/circuit.py b/circuit.py
index 7e7ab2c..a3fc842 100644
--- a/circuit.py
+++ b/circuit.py
@@ -650,7 +650,7 @@ def generate_dataset(self, filename, samples, distribution='uniform', **kwargs):
 
         return
 
-    def write_tb(self, filename, dataset_file, iterations=None, timescale= '10ns / 1ps', delay=10, format='h', dump_vcd=False):
+    def write_tb(self, filename, dataset_file, iterations=None, timescale= '10ns / 1ps', delay=10, format='h', dump_vcd=False, show_progress=True):
         '''
         Writes a basic testbench for the circuit.
 
@@ -675,6 +675,8 @@ def write_tb(self, filename, dataset_file, iterations=None, timescale= '10ns / 1
                 'o' for octal
                 'd' for decimal
                 'b' for binary
+        show_progress: bool, default = True
+            Whether the testbench should print its progress as it executes.
 
         Returns
         -------
@@ -751,7 +753,11 @@ def write_tb(self, filename, dataset_file, iterations=None, timescale= '10ns / 1
               f'\n' \
 
         '''Initial statement'''
-        text= f'{text}initial begin\n $display("-- Beginning Simulation --");\n\n'
+        text= f'{text}initial begin\n'
+
+        if show_progress:
+            text += '$display("-- Beginning Simulation --");\n\n'
+
         if dump_vcd:
             text=f'{text} $dumpfile("./{self.topmodule}.vcd");\n' \
                  f' $dumpvars(0,{self.topmodule}_tb);\n'
@@ -778,9 +784,12 @@ def write_tb(self, filename, dataset_file, iterations=None, timescale= '10ns / 1
         text=f'{text}",'
         for o in list(outputs_info.keys())[::-1][0:-1]:
             text= f'{text}{o},'
-        text= f'{text}{list(outputs_info.keys())[0]});\n'\
-            + f'  $display("-- Progress: %d/{iterations} --",i+1);\n'\
-              f' end\n' \
+        text= f'{text}{list(outputs_info.keys())[0]});\n'
+
+        if show_progress:
+            text +=f'  $display("-- Progress: %d/{iterations} --",i+1);\n'
+
+        text = f'{text}end\n' \
               f' $fclose(file);\n' \
               f' $fclose(mem);\n' \
               f' $finish;\n' \

From cd6af035c282cc3c6e7e71eeb025f2e97ef4b265 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Wed, 7 May 2025 15:39:20 -0600
Subject: [PATCH 04/65] Add CLI parsing for the configuration parameters. Also
 rename parameters to be shorter for CLI flags.

---
 __main__.py      | 215 +++++++++++++++++++++++++++++----------
 configuration.py | 259 +++++++++++++++++++++++------------------------
 2 files changed, 288 insertions(+), 186 deletions(-)

diff --git a/__main__.py b/__main__.py
index a7b891a..df2cb36 100644
--- a/__main__.py
+++ b/__main__.py
@@ -1,56 +1,161 @@
+import argparse
+import os
+from typing import List
+from configuration import ApproxSynthesisConfig, AlsMethod
 
-from os import path
-import sys
-
-
-CONFIG_FILE = "poisonoak/poisonoak.config"
-HELP_FILE = "poisonoak/poisonoak.help"
-
-
-def trim(text):
-    '''
-    Remove the spaces between text
-
-    Parameters
-    ----------
-    text : string
-        text with white spaces
-
-    Returns
-    -------
-    string
-        text without white spaces
-    '''
-    return text.replace(" ", "").replace("\n","")
-
-
-def read_config():
-    '''
-    Read the config file to check everything is there
-
-    Returns
-    -------
-    boolean
-        True if all the variables are defined and correct
-    '''
-    if (path.exists(CONFIG_FILE)):
-        with open(CONFIG_FILE) as config:
-            for line in config.readlines():
-                if len(line) > 10:
-                    var, value = trim(line).split("=")
-                    if var == "RTL" and path.exists(value):
-                        print (value, "CHECK")
-                    else:
-                        print(f"Option {var} is incorrect")
-                        return False
-        return True
-    else:
-        print("Config File does not exists!")
-
-
-for arg in sys.argv:
-    if (arg == "-h" or arg == "--help"):
-        with open(HELP_FILE) as help:
-            print(help.read())
-    else:
-        read_config()
+
+def parse_generate(value):
+    if value == "exhaustive":
+        return value
+
+    try:
+        return int(value)
+    except ValueError:
+        try:
+            return float(value)
+        except ValueError:
+            raise argparse.ArgumentTypeError(
+                f"Invalid generate_dataset value: {value}. Must be int, float or 'exhaustive'."
+            )
+
+
+def parse_subset(value):
+    try:
+        return int(value)
+    except ValueError:
+        try:
+            return float(value)
+        except ValueError:
+            raise argparse.ArgumentTypeError(
+                f"Invalid generate_dataset value: {value}. Must be int or float."
+            )
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="AxLS CLI. Can execute the different Approximate Logic Synthesis methods available with some configuration options."
+    )
+    parser.add_argument(
+        "--method",
+        required=True,
+        type=str,
+        choices=[m.value for m in AlsMethod],
+        help="Approximation method.",
+    )
+    parser.add_argument(
+        "--circuit", required=True, nargs="+", help="Verilog circuit file(s)."
+    )
+    parser.add_argument("--dataset", required=True, nargs="+", help="Dataset file(s).")
+    parser.add_argument(
+        "--generate",
+        nargs="+",
+        type=parse_generate,
+        help="If provided, the dataset file(s) will be generated instead of looking for existing ones. This will overwrite any existing files. Accepts int (the amount of samples), float (between 0-1, a percentage of all the possible inputs) or 'exhaustive' (generate all possible inputs for a circuit, only usable for circuits with less than 32 input bits).",
+    )
+    parser.add_argument(
+        "--subset",
+        nargs="+",
+        type=parse_subset,
+        help="If provided, will only use a subset of the existing dataset file(s). Accepts int (the amount of samples) or float (0 < x <= 1, a percentage of the available samples).",
+    )
+    parser.add_argument(
+        "--resynthesis", action="store_true", help="If provided will use resynthesis."
+    )
+    parser.add_argument(
+        "--error",
+        type=float,
+        help="Maximum error threshold to stop iterations. (0 < x <= 1).",
+    )
+    parser.add_argument(
+        "--max-iters",
+        type=int,
+        help="Maximum number of iterations for iterative methods.",
+    )
+    parser.add_argument(
+        "--save",
+        type=str,
+        help="Path to a file to save the run's configuration and progress.",
+    )
+    parser.add_argument(
+        "--resume",
+        action="store_true",
+        help="If provided, will resume from the existing save file.",
+    )
+    parser.add_argument(
+        "--max-depth", nargs="+", type=int, help="Max depth for decision_tree method"
+    )
+    parser.add_argument(
+        "--one-tree-per-output",
+        action="store_true",
+        help="Use one tree per output for decision_tree",
+    )
+    parser.add_argument(
+        "--show-progress", action="store_true", help="Show simulation progress"
+    )
+    args = parser.parse_args()
+
+    # number of circuits
+    n = len(args.circuit)
+
+    def check_list[T](name: str, lst: List[T]) -> T | List[T]:
+        ln = len(lst)
+        if ln == 1:
+            return lst[0]
+        elif ln == n:
+            return lst
+        else:
+            if n == 1:
+                parser.error(
+                    f"--{name} only accepts 1 value when only 1 circuit is given, got {ln} values"
+                )
+            else:
+                parser.error(f"--{name} requires 1 or {n} values, got {ln}")
+
+    # align lists
+    args.circuit = check_list("circuit", args.circuit)
+    args.dataset = check_list("dataset", args.dataset)
+
+    if args.generate:
+        args.generate = check_list("generate", args.generate)
+    if args.subset:
+        args.subset = check_list("subset", args.subset)
+
+    if args.max_depth:
+        args.max_depth = check_list("max-depth", args.max_depth)
+
+    # exclusivity checks
+    if args.generate and args.subset:
+        parser.error("Cannot specify both --generate and --subset")
+
+    if args.resume:
+        if not args.save:
+            parser.error("--save is required when --resume is set")
+        if not os.path.isfile(args.save):
+            parser.error(f"Save file does not exist: {args.save}")
+
+    # instantiate config
+    try:
+        config = ApproxSynthesisConfig(
+            method=args.method,
+            circuit=args.circuit,
+            dataset=args.dataset,
+            generate=args.generate,
+            subset=args.subset,
+            resynthesis=args.resynthesis,
+            error=args.error,
+            max_iters=args.max_iters,
+            save=args.save,
+            resume=args.resume,
+            max_depth=args.max_depth,
+            one_tree_per_output=args.one_tree_per_output,
+            show_progress=args.show_progress,
+        )
+    except ValueError as e:
+        parser.error(str(e))
+
+    print("Configuration loaded successfully")
+    print(config)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/configuration.py b/configuration.py
index b2a9a39..beefd27 100644
--- a/configuration.py
+++ b/configuration.py
@@ -12,6 +12,12 @@ class AlsMethod(str, Enum):
     CCARVING = "ccarving"
     DECISION_TREE = "decision_tree"
 
+    def __repr__(self):
+        return self.value
+
+    def __str__(self):
+        return self.value
+
 
 # List of iterative methods.
 _ITERATIVE_METHODS = [
@@ -37,18 +43,18 @@ class ApproxSynthesisConfig:
         following string names: 'inconst', 'outconst', 'probrun',
         'significance', 'ccarving', or 'decision_tree'.
 
-    circuit_file : str | List[str]
+    circuit : str | List[str]
         Path(s) to the Verilog circuit file(s).
 
-    dataset_file : str | List[str]
+    dataset : str | List[str]
         Path(s) to the dataset file(s).
 
-        If multiple circuits were specified with `circuit_file`, this can be
-        given as a list which must match the `circuit_file` list length.
+        If multiple circuits were specified with `circuit`, this can be
+        given as a list which must match the `circuit` list length.
         Each dataset will be used with each corresponding circuit.
 
-    generate_dataset : str | int | float | List[str | int | float], optional
-        If specified, the dataset file(s) given with dataset_file will be
+    generate : str | int | float | List[str | int | float], optional
+        If specified, the dataset file(s) given with dataset will be
         generated on the spot. Overwriting any existing file(s).
 
         Rule(s) to generate dataset. Can be:
@@ -56,25 +62,25 @@ class ApproxSynthesisConfig:
         - an integer number of inputs
         - a float percentage (0 < x <= 1)
 
-        If multiple datasets were specified with `dataset_file`, this option can
-        be given as a list which must match the `dataset_file` list length.
+        If multiple datasets were specified with `dataset`, this option can
+        be given as a list which must match the `dataset` list length.
         Each dataset will be generated with the corresponding settings.
 
-    dataset_fraction : int | float | List[int | float], optional
-        Subset of dataset to use. Cannot be used with `generate_dataset`.
+    subset : int | float | List[int | float], optional
+        Subset of dataset to use. Cannot be used with `generate`.
 
         Can be given as:
         - an integer number of inputs (lower than dataset size)
         - a float percentage (0 < x <= 1)
 
-        If multiple datasets were specified with `dataset_file`, this option
-        can be given as a list which must match the `dataset_file` list length.
+        If multiple datasets were specified with `dataset`, this option
+        can be given as a list which must match the `dataset` list length.
         Each dataset will be generated with the corresponding settings.
 
     resynthesis : bool, default=False
         Whether to use resynthesis.
 
-    error_threshold : float (0 < x <= 1)
+    error : float (0 < x <= 1)
         The maximum error threshold permitted. Required for iterative methods,
         like pruning methods or ML methods with resynthesis.
 
@@ -82,17 +88,17 @@ class ApproxSynthesisConfig:
         Maximum amount of iterations to execute. Used in iterative methods,
         like pruning methods or ML methods with resynthesis.
 
-    save_file : str, optional
+    save : str, optional
         Path to file where configuration and progress of the run is saved.
 
-    continue_from_save : bool, default=False
-        Whether to continue from a saved file. If True, `save_file` option must
+    resume : bool, default=False
+        Whether to continue from a saved file. If True, `save` option must
         be provided. If the file doesn't exist, a new run will be started.
 
     max_depth : int | List[int]
         Required for 'decision_tree'. Can be a single int or list if multiple
         circuits are specified. In which case the list must match the
-        `circuit_file` list length. Each circuit will use the corresponding max
+        `circuit` list length. Each circuit will use the corresponding max
         depth for its decision tree.
 
     one_tree_per_output : bool, default=False
@@ -100,7 +106,7 @@ class ApproxSynthesisConfig:
         If True, uses a separate tree per output.
         If False, uses a single multi-output tree.
 
-    show_tb_progress : bool, default=False
+    show_progress : bool, default=False
         Whether to show simulation progress.
     """
 
@@ -108,34 +114,34 @@ class ApproxSynthesisConfig:
     # AlsMethod and anything that can be a list is turned into a list. For ease
     # of use of this struct in the runner code.
     method: AlsMethod
-    circuit_file: List[str]
-    dataset_file: List[str]
-    generate_dataset: List[str | int | float] | None
-    dataset_fraction: List[int | float] | None
+    circuit: List[str]
+    dataset: List[str]
+    generate: List[str | int | float] | None
+    subset: List[int | float] | None
     resynthesis: bool
-    error_threshold: float | None
+    error: float | None
     max_iters: int | None
-    save_file: str | None
-    continue_from_save: bool
+    save: str | None
+    resume: bool
     max_depth: List[int] | None
     one_tree_per_output: bool
-    show_tb_progress: bool
+    show_progress: bool
 
     def __init__(
         self,
         method: AlsMethod | str,
-        circuit_file: str | List[str],
-        dataset_file: str | List[str],
-        generate_dataset: str | int | float | List[str | int | float] | None = None,
-        dataset_fraction: int | float | List[int | float] | None = None,
+        circuit: str | List[str],
+        dataset: str | List[str],
+        generate: str | int | float | List[str | int | float] | None = None,
+        subset: int | float | List[int | float] | None = None,
         resynthesis: bool = False,
-        error_threshold: float | None = None,
+        error: float | None = None,
         max_iters: int | None = None,
-        save_file: str | None = None,
-        continue_from_save: bool = False,
+        save: str | None = None,
+        resume: bool = False,
         max_depth: int | List[int] | None = None,
         one_tree_per_output: bool = False,
-        show_tb_progress: bool = False,
+        show_progress: bool = False,
     ):
         """
         Instantiate and validate an ApproxSynthesisConfig.
@@ -146,32 +152,23 @@ def __init__(
             If required parameters are missing or invalid.
         """
         self.method = _validate_method(method)
-        self.circuit_file = _validate_circuit_files(circuit_file)
-        self.dataset_file = _validate_dataset_files(
-            dataset_file, self.circuit_file, generate_dataset
-        )
+        self.circuit = _validate_circuits(circuit)
+        self.dataset = _validate_datasets(dataset, self.circuit, generate)
 
-        _validate_dataset_fraction_vs_generate_exclusivity(
-            generate_dataset, dataset_fraction
-        )
+        _validate_subset_vs_generate_exclusivity(generate, subset)
 
-        self.generate_dataset = _validate_generate_dataset(
-            generate_dataset, self.dataset_file
-        )
-        self.dataset_fraction = _validate_dataset_fraction(
-            dataset_fraction, self.dataset_file
-        )
+        self.generate = _validate_generate(generate, self.dataset)
+        self.subset = _validate_subset(subset, self.dataset)
 
         self.resynthesis = resynthesis
-        self.error_threshold = _validate_error_threshold(
-            error_threshold, self.method, self.resynthesis
-        )
+        self.error = _validate_error(error, self.method, self.resynthesis)
         self.max_iters = _validate_max_iters(max_iters, self.method, self.resynthesis)
-        self.max_depth = _validate_max_depth(max_depth, self.method, self.circuit_file)
-        self.save_file = _validate_save_file(save_file, continue_from_save)
-        self.continue_from_save = continue_from_save
+
+        self.max_depth = _validate_max_depth(max_depth, self.method, self.circuit)
+        self.save = _validate_save(save, resume)
+        self.resume = resume
         self.one_tree_per_output = one_tree_per_output
-        self.show_progress = show_tb_progress
+        self.show_progress = show_progress
 
     def __repr__(self):
         fields = ", ".join(f"{key}={value!r}" for key, value in self.__dict__.items())
@@ -199,11 +196,11 @@ def _validate_method(method: AlsMethod | str) -> AlsMethod:
     return method
 
 
-def _validate_circuit_files(circuits: str | List[str]) -> List[str]:
+def _validate_circuits(circuits: str | List[str]) -> List[str]:
     """
     Validates the existence of circuit files.
 
-    Checks if each file path in `circuit_file` exists. If not, raises a ValueError.
+    Checks if each file path in `circuit` exists. If not, raises a ValueError.
     Ensures input Verilog files are valid for further processing.
     """
     circuits = circuits if isinstance(circuits, list) else [circuits]
@@ -214,53 +211,49 @@ def _validate_circuit_files(circuits: str | List[str]) -> List[str]:
     return circuits
 
 
-def _validate_dataset_files(
-    dataset_files: str | List[str],
-    circuit_files: List[str],
-    generate_dataset: str | int | float | List[str | int | float] | None,
+def _validate_datasets(
+    datasets: str | List[str],
+    circuits: List[str],
+    generate: str | int | float | List[str | int | float] | None,
 ) -> List[str]:
     """
     Validates dataset files.
 
-    Ensures each dataset file exists, unless 'generate_dataset' is set.
+    Ensures each dataset file exists, unless 'generate' is set.
 
     Uses `_ensure_length_match` to validate correspondence with circuit files.
     Raises a ValueError if any required dataset file is missing.
     """
-    dataset_files = _ensure_length_match(
-        dataset_files, circuit_files, "dataset_file", "circuit_file"
-    )
-    if generate_dataset is None:
-        for f in dataset_files:
+    datasets = _ensure_length_match(datasets, circuits, "dataset", "circuit")
+    if generate is None:
+        for f in datasets:
             if not os.path.isfile(f):
                 raise ValueError(
-                    f"Dataset file not found: {f}. Use 'generate_dataset' or provide a valid file."
+                    f"Dataset file not found: {f}. Use 'generate' or provide a valid file."
                 )
-    return dataset_files
+    return datasets
 
 
-def _validate_dataset_fraction_vs_generate_exclusivity(
-    generate_dataset: str | int | float | List[str | int | float] | None,
-    dataset_fraction: int | float | List[int | float] | None,
+def _validate_subset_vs_generate_exclusivity(
+    generate: str | int | float | List[str | int | float] | None,
+    subset: int | float | List[int | float] | None,
 ):
     """
-    Ensures mutual exclusivity between 'generate_dataset' and 'dataset_fraction'.
+    Ensures mutual exclusivity between 'generate' and 'subset'.
 
     Both fields are incompatible; only one may be provided.
     Raises a ValueError if both are given.
     """
-    if generate_dataset is not None and dataset_fraction is not None:
-        raise ValueError(
-            "Cannot specify both 'generate_dataset' and 'dataset_fraction'."
-        )
+    if generate is not None and subset is not None:
+        raise ValueError("Cannot specify both 'generate' and 'subset'.")
 
 
-def _validate_generate_dataset(
-    generate_dataset: str | int | float | List[str | int | float] | None,
-    dataset_files: List[str],
+def _validate_generate(
+    generate: str | int | float | List[str | int | float] | None,
+    datasets: List[str],
 ) -> List[str | int | float] | None:
     """
-    Validates 'generate_dataset' parameters.
+    Validates 'generate' parameters.
 
     Ensures that each generation rule is valid:
     - str: must be 'exhaustive'
@@ -270,38 +263,36 @@ def _validate_generate_dataset(
     Uses `_ensure_length_match` to align with dataset files.
     Raises ValueError for invalid values.
     """
-    if generate_dataset is not None:
+    if generate is not None:
         gens = _ensure_length_match(
-            generate_dataset,
-            dataset_files,
-            "generate_dataset",
-            "dataset_file",
+            generate,
+            datasets,
+            "generate",
+            "dataset",
         )
         for g in gens:
             if isinstance(g, str):
                 if g != "exhaustive":
-                    raise ValueError(f"Invalid generate_dataset string: {g}")
+                    raise ValueError(f"Invalid generate string: {g}")
                 # TODO: Check number of inputs in circuit (must be <= 32)
             elif isinstance(g, int):
                 if g <= 0:
-                    raise ValueError("Integer generate_dataset must be > 0")
+                    raise ValueError("Integer generate must be > 0")
                 # TODO: Check it's <= max input count of the circuit
             elif isinstance(g, float):
                 if not (0 < g <= 1):
-                    raise ValueError(
-                        "Percentage generate_dataset must be between 0 and 1."
-                    )
+                    raise ValueError("Percentage generate must be between 0 and 1.")
             else:
-                raise ValueError(f"Invalid generate_dataset value: {g}")
+                raise ValueError(f"Invalid generate value: {g}")
         return gens
 
 
-def _validate_dataset_fraction(
-    dataset_fraction: int | float | List[int | float] | None,
-    dataset_files: List[str],
+def _validate_subset(
+    subset: int | float | List[int | float] | None,
+    datasets: List[str],
 ) -> List[int | float] | None:
     """
-    Validates 'dataset_fraction'.
+    Validates 'subset'.
 
     Ensures values are valid:
     - int: must be > 0
@@ -310,31 +301,29 @@ def _validate_dataset_fraction(
     Uses `_ensure_length_match` to align with dataset files.
     Raises ValueError for out-of-range values.
     """
-    if dataset_fraction is not None:
+    if subset is not None:
         fracs = _ensure_length_match(
-            dataset_fraction,
-            dataset_files,
-            "dataset_fraction",
-            "dataset_file",
+            subset,
+            datasets,
+            "subset",
+            "dataset",
         )
         for f in fracs:
             if isinstance(f, int) and f <= 0:
-                raise ValueError("dataset_fraction as int must be > 0")
+                raise ValueError("subset as int must be > 0")
             elif isinstance(f, float) and not (0 < f <= 1):
-                raise ValueError(
-                    "dataset_fraction as percentage must be between 0 and 1"
-                )
+                raise ValueError("subset as percentage must be between 0 and 1")
             # TODO: Check dataset size to ensure integer fraction < full dataset
         return fracs
 
 
-def _validate_error_threshold(
-    error_threshold: float | None,
+def _validate_error(
+    error: float | None,
     method: AlsMethod,
     resynthesis: bool,
 ) -> float | None:
     """
-    Validates 'error_threshold'.
+    Validates 'error'.
 
     Required for:
     - all iterative methods
@@ -343,18 +332,14 @@ def _validate_error_threshold(
     Raises ValueError if missing in those cases.
     """
     if method in _ITERATIVE_METHODS:
-        if error_threshold is None:
-            raise ValueError(f"'error_threshold' is required for method {method}")
+        if error is None:
+            raise ValueError(f"'error' is required for method {method}")
     elif (
-        method in _ITERATIVE_METHODS_WITH_RESYNTHESIS
-        and resynthesis
-        and error_threshold is None
+        method in _ITERATIVE_METHODS_WITH_RESYNTHESIS and resynthesis and error is None
     ):
-        raise ValueError(
-            f"'error_threshold' is required for method {method} with resynthesis"
-        )
+        raise ValueError(f"'error' is required for method {method} with resynthesis")
 
-    return error_threshold
+    return error
 
 
 def _validate_max_iters(
@@ -376,38 +361,50 @@ def _validate_max_iters(
 
 
 def _validate_max_depth(
-    max_depth: int | List[int] | None, method: AlsMethod, circuit_files: List[str]
-):
+    max_depth: int | List[int] | None, method: AlsMethod, circuits: List[str]
+) -> List[int] | None:
     """
     Validates 'max_depth' for decision trees.
 
     Ensures it is provided for the 'decision_tree' method and aligns with
     the number of circuit files if given as a list.
+
+    Ensures values are valid:
+    - int: must be > 1
+
     Raises ValueError if missing or mismatched.
     """
     if method == AlsMethod.DECISION_TREE:
         if max_depth is None:
             raise ValueError(f"'max_depth' is required for method f{method}.")
         else:
-            _ensure_length_match(max_depth, circuit_files, "max_depth", "circuit_file")
+            max_depths = _ensure_length_match(
+                max_depth,
+                circuits,
+                "max_depth",
+                "circuit",
+            )
+            for m in max_depths:
+                if m <= 0:
+                    raise ValueError("max_depth must be > 0")
+                # TODO: Check dataset size to ensure integer fraction < full dataset
+            return max_depths
 
 
-def _validate_save_file(save_file: str | None, continue_from_save: bool) -> str | None:
+def _validate_save(save: str | None, resume: bool) -> str | None:
     """
     Validates parameters for continuing from a saved file.
 
-    Ensures 'save_file' is provided and exists on disk when
-    'continue_from_save' is True. Raises ValueError otherwise.
+    Ensures 'save' is provided and exists on disk when
+    'resume' is True. Raises ValueError otherwise.
     """
-    if continue_from_save:
-        if save_file is None:
-            raise ValueError("To continue from save, 'save_file' must be provided.")
-        if not os.path.isfile(save_file):
-            raise ValueError(
-                f"To continue from save, 'save_file' must exist: {save_file}"
-            )
+    if resume:
+        if save is None:
+            raise ValueError("To continue from save, 'save' must be provided.")
+        if not os.path.isfile(save):
+            raise ValueError(f"To continue from save, 'save' must exist: {save}")
 
-    return save_file
+    return save
 
 
 def _ensure_length_match[T: str | int | float](
@@ -451,4 +448,4 @@ def _ensure_length_match[T: str | int | float](
         return values
     else:
         value: T = values
-        return [value for _ in ref_values]
+        return [value] * len(ref_values)

From 58a42e74c3f5013481f4bb1efffc6eac1b2f0bdc Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Wed, 7 May 2025 21:30:10 -0600
Subject: [PATCH 05/65] Simplify run config and add generate subcommand.

---
 __main__.py      | 236 ++++++++++++++++++-----------------
 circuit.py       |   4 +-
 circuiterror.py  |   3 +-
 configuration.py | 318 ++++++++---------------------------------------
 utils.py         |  10 +-
 5 files changed, 184 insertions(+), 387 deletions(-)

diff --git a/__main__.py b/__main__.py
index df2cb36..7fb00a6 100644
--- a/__main__.py
+++ b/__main__.py
@@ -1,13 +1,20 @@
 import argparse
-import os
-from typing import List
-from configuration import ApproxSynthesisConfig, AlsMethod
-
+from circuit import Circuit
+from configuration import ApproxSynthesisConfig, AlsMethod, Metric
+
+# The tech library is hardcoded for the following reasons:
+# - Ease of use: This way users don't have to provide a tech library which most
+#                of the time would be this same one.
+# - Tool limitations:
+#   - AxLS only provides this tech library.
+#   - The Circuit class accepts a tech library "name" and assumes a .v and .lib
+#     files by that name in the templates/ directory of AxLS exist.
+#   - If we want to let users provide custom tech libraries through optional
+#     flags we'll need to make Circuit accept arbitrary paths to the needed tech
+#     files.
+TECH="NanGate15nm"
 
 def parse_generate(value):
-    if value == "exhaustive":
-        return value
-
     try:
         return int(value)
     except ValueError:
@@ -15,146 +22,143 @@ def parse_generate(value):
             return float(value)
         except ValueError:
             raise argparse.ArgumentTypeError(
-                f"Invalid generate_dataset value: {value}. Must be int, float or 'exhaustive'."
+                f"Invalid generate_dataset value: {value}. Must be int or float."
             )
 
+def main():
+    parser = argparse.ArgumentParser(
+        description="AxLS CLI. Provides a simplified interface to the package's functionality."
+    )
 
-def parse_subset(value):
-    try:
-        return int(value)
-    except ValueError:
+    subparsers = parser.add_subparsers(
+        title="subcommands", dest="subcommand", required=True
+    )
+
+    run_parser = subparsers.add_parser(
+        "run",
+        help="Run Approximate Logic Synthesis on a circuit using one of the provided methods.",
+    )
+
+    run_arguments(run_parser)
+
+    generate_parser = subparsers.add_parser(
+        "generate", help="Generate a dataset that can be used with the 'run' command."
+    )
+
+    generate_arguments(generate_parser)
+
+    args = parser.parse_args()
+
+    if args.subcommand == "run":
         try:
-            return float(value)
-        except ValueError:
-            raise argparse.ArgumentTypeError(
-                f"Invalid generate_dataset value: {value}. Must be int or float."
+            config = ApproxSynthesisConfig(
+                method=args.method,
+                circuit=args.circuit,
+                dataset=args.dataset,
+                resynthesis=args.resynthesis,
+                error=args.error,
+                max_iters=args.max_iters,
+                max_depth=args.max_depth,
+                one_tree_per_output=args.one_tree_per_output,
+                show_progress=args.show_progress,
             )
+        except ValueError as e:
+            parser.error(str(e))
 
+        print("Configuration loaded successfully")
+        print(config)
 
-def main():
-    parser = argparse.ArgumentParser(
-        description="AxLS CLI. Can execute the different Approximate Logic Synthesis methods available with some configuration options."
-    )
-    parser.add_argument(
-        "--method",
-        required=True,
+    elif args.subcommand == "generate":
+        generate_dataset(args)
+
+
+def run_arguments(run_parser):
+    """
+    Adds the arguments to the 'run' subcomand parser
+    """
+
+    run_parser.add_argument(
+        "method",
         type=str,
         choices=[m.value for m in AlsMethod],
         help="Approximation method.",
     )
-    parser.add_argument(
-        "--circuit", required=True, nargs="+", help="Verilog circuit file(s)."
-    )
-    parser.add_argument("--dataset", required=True, nargs="+", help="Dataset file(s).")
-    parser.add_argument(
-        "--generate",
-        nargs="+",
-        type=parse_generate,
-        help="If provided, the dataset file(s) will be generated instead of looking for existing ones. This will overwrite any existing files. Accepts int (the amount of samples), float (between 0-1, a percentage of all the possible inputs) or 'exhaustive' (generate all possible inputs for a circuit, only usable for circuits with less than 32 input bits).",
-    )
-    parser.add_argument(
-        "--subset",
+    run_parser.add_argument("circuit", help="Verilog circuit file.")
+    run_parser.add_argument("dataset", help="Dataset file to run simulations with.")
+    run_parser.add_argument(
+        "metrics",
         nargs="+",
-        type=parse_subset,
-        help="If provided, will only use a subset of the existing dataset file(s). Accepts int (the amount of samples) or float (0 < x <= 1, a percentage of the available samples).",
+        choices=[m.value for m in Metric],
+        # TODO: Add docs about what each metric is
+        help="Metrics to calculate, at least one must be given.",
     )
-    parser.add_argument(
+    run_parser.add_argument(
         "--resynthesis", action="store_true", help="If provided will use resynthesis."
     )
-    parser.add_argument(
+    run_parser.add_argument(
         "--error",
         type=float,
         help="Maximum error threshold to stop iterations. (0 < x <= 1).",
     )
-    parser.add_argument(
+    run_parser.add_argument(
         "--max-iters",
         type=int,
         help="Maximum number of iterations for iterative methods.",
     )
-    parser.add_argument(
-        "--save",
-        type=str,
-        help="Path to a file to save the run's configuration and progress.",
-    )
-    parser.add_argument(
-        "--resume",
-        action="store_true",
-        help="If provided, will resume from the existing save file.",
+    run_parser.add_argument(
+        "--max-depth", type=int, help="Max depth for decision_tree method"
     )
-    parser.add_argument(
-        "--max-depth", nargs="+", type=int, help="Max depth for decision_tree method"
-    )
-    parser.add_argument(
+    run_parser.add_argument(
         "--one-tree-per-output",
         action="store_true",
         help="Use one tree per output for decision_tree",
     )
-    parser.add_argument(
+    run_parser.add_argument(
         "--show-progress", action="store_true", help="Show simulation progress"
     )
-    args = parser.parse_args()
+    run_parser.add_argument(
+        "--csv",
+        type=str,
+        help="""Path to a file to save the output in csv format.
+        If the file doesn't exist, it will be created, if it exists it will be appended to.
+        The output will be given as a single line with the following columns:
+            method, circuit, flag1, flag2, ...,  metric1, metric2, ...""",
+    )
 
-    # number of circuits
-    n = len(args.circuit)
-
-    def check_list[T](name: str, lst: List[T]) -> T | List[T]:
-        ln = len(lst)
-        if ln == 1:
-            return lst[0]
-        elif ln == n:
-            return lst
-        else:
-            if n == 1:
-                parser.error(
-                    f"--{name} only accepts 1 value when only 1 circuit is given, got {ln} values"
-                )
-            else:
-                parser.error(f"--{name} requires 1 or {n} values, got {ln}")
-
-    # align lists
-    args.circuit = check_list("circuit", args.circuit)
-    args.dataset = check_list("dataset", args.dataset)
-
-    if args.generate:
-        args.generate = check_list("generate", args.generate)
-    if args.subset:
-        args.subset = check_list("subset", args.subset)
-
-    if args.max_depth:
-        args.max_depth = check_list("max-depth", args.max_depth)
-
-    # exclusivity checks
-    if args.generate and args.subset:
-        parser.error("Cannot specify both --generate and --subset")
-
-    if args.resume:
-        if not args.save:
-            parser.error("--save is required when --resume is set")
-        if not os.path.isfile(args.save):
-            parser.error(f"Save file does not exist: {args.save}")
-
-    # instantiate config
-    try:
-        config = ApproxSynthesisConfig(
-            method=args.method,
-            circuit=args.circuit,
-            dataset=args.dataset,
-            generate=args.generate,
-            subset=args.subset,
-            resynthesis=args.resynthesis,
-            error=args.error,
-            max_iters=args.max_iters,
-            save=args.save,
-            resume=args.resume,
-            max_depth=args.max_depth,
-            one_tree_per_output=args.one_tree_per_output,
-            show_progress=args.show_progress,
-        )
-    except ValueError as e:
-        parser.error(str(e))
-
-    print("Configuration loaded successfully")
-    print(config)
+
+def generate_arguments(generate_parser):
+    """
+    Adds the arguments to the 'generate' subcomand parser
+    """
+    generate_parser.add_argument("circuit", help="Verilog circuit file.")
+    generate_parser.add_argument("dataset", help="Dataset file to generate.")
+    generate_parser.add_argument(
+        "size",
+        type=parse_generate,
+        help="""The size of the dataset.
+        Accepts an int (x > 0, a set amount of samples), or a float (0 < x <= 1, a of the total amount of inputs possible).
+        Note that for big circuits, like those with 32 input bits or more, generating a large fraction of the possible inputs might take a long time, due to the amount of possible inputs growing exponentially (2^n).
+        """,
+    )
+
+def generate_dataset(args: argparse.Namespace):
+    circuit = Circuit(args.circuit, TECH)
+
+    size = args.size
+    if isinstance(size, int):
+        if not size > 0:
+            raise argparse.ArgumentTypeError(
+                f"Dataset size must be greater than 0: {size}")
+
+    if isinstance(size, float):
+        if not (0 < size <= 1.0):
+            raise argparse.ArgumentTypeError(
+                f"Dataset size must be greater than 0: {size}")
+
+        max_inputs = 2**(len(circuit.inputs))
+        size = round(max_inputs*size)
+
+    circuit.generate_dataset(args.dataset, size)
 
 
 if __name__ == "__main__":
diff --git a/circuit.py b/circuit.py
index a3fc842..8750900 100644
--- a/circuit.py
+++ b/circuit.py
@@ -55,8 +55,8 @@ def __init__(self, rtl, tech, saif = ""):
         self.rtl_file = rtl
         self.tech_file = tech
         self.topmodule = rtl.split('/')[-1].replace(".v","")
-        self.netl_file = synthesis (rtl, tech, self.topmodule)
-        self.technology = Technology(tech)
+        self.netl_file = synthesis (rtl, self.tech_file, self.topmodule)
+        self.technology = Technology(self.tech_file)
         # extract the usefull attributes of netlist
         netlist = Netlist(self.netl_file, self.technology)
         self.netl_root = netlist.root
diff --git a/circuiterror.py b/circuiterror.py
index 822b84c..ea4b329 100644
--- a/circuiterror.py
+++ b/circuiterror.py
@@ -1,3 +1,4 @@
+from enum import Enum
 import numpy as np
 
 def extract_numbers(filename):
@@ -33,7 +34,7 @@ def compute_error(metric, original, approximate):
     ----------
     metric : string
         equation to measure the error
-        options med, wce, wcre,mred, msed
+        options med, wce, mred, msed
     original : string
         path to the original results text file
     approximate : string
diff --git a/configuration.py b/configuration.py
index beefd27..6b48e5f 100644
--- a/configuration.py
+++ b/configuration.py
@@ -1,8 +1,8 @@
-from typing import List
 import os
 
 from enum import Enum
 
+from circuit import Circuit
 
 class AlsMethod(str, Enum):
     CONSTANT_INPUTS = "inconst"
@@ -18,6 +18,13 @@ def __repr__(self):
     def __str__(self):
         return self.value
 
+class Metric(str, Enum):
+    MEAN_ERROR_DISTANCE = "med"
+    WORST_CASE_ERROR = "wce"
+    MEAN_RELATIVE_ERROR_DISTANCE = "mred"
+    MEAN_SQUARED_ERROR_DISTANCE = "msed"
+    ALS_TIME = "time"
+
 
 # List of iterative methods.
 _ITERATIVE_METHODS = [
@@ -43,39 +50,12 @@ class ApproxSynthesisConfig:
         following string names: 'inconst', 'outconst', 'probrun',
         'significance', 'ccarving', or 'decision_tree'.
 
-    circuit : str | List[str]
-        Path(s) to the Verilog circuit file(s).
-
-    dataset : str | List[str]
-        Path(s) to the dataset file(s).
-
-        If multiple circuits were specified with `circuit`, this can be
-        given as a list which must match the `circuit` list length.
-        Each dataset will be used with each corresponding circuit.
-
-    generate : str | int | float | List[str | int | float], optional
-        If specified, the dataset file(s) given with dataset will be
-        generated on the spot. Overwriting any existing file(s).
-
-        Rule(s) to generate dataset. Can be:
-        - 'exhaustive' (only for circuits with number of inputs <= 32)
-        - an integer number of inputs
-        - a float percentage (0 < x <= 1)
-
-        If multiple datasets were specified with `dataset`, this option can
-        be given as a list which must match the `dataset` list length.
-        Each dataset will be generated with the corresponding settings.
-
-    subset : int | float | List[int | float], optional
-        Subset of dataset to use. Cannot be used with `generate`.
+    circuit : Circuit
+        A synthesized RTL circuit. See the circuit module.
 
-        Can be given as:
-        - an integer number of inputs (lower than dataset size)
-        - a float percentage (0 < x <= 1)
-
-        If multiple datasets were specified with `dataset`, this option
-        can be given as a list which must match the `dataset` list length.
-        Each dataset will be generated with the corresponding settings.
+    dataset : str
+        Path to the dataset file.
+        TODO: Document dataset file format.
 
     resynthesis : bool, default=False
         Whether to use resynthesis.
@@ -88,18 +68,8 @@ class ApproxSynthesisConfig:
         Maximum amount of iterations to execute. Used in iterative methods,
         like pruning methods or ML methods with resynthesis.
 
-    save : str, optional
-        Path to file where configuration and progress of the run is saved.
-
-    resume : bool, default=False
-        Whether to continue from a saved file. If True, `save` option must
-        be provided. If the file doesn't exist, a new run will be started.
-
-    max_depth : int | List[int]
-        Required for 'decision_tree'. Can be a single int or list if multiple
-        circuits are specified. In which case the list must match the
-        `circuit` list length. Each circuit will use the corresponding max
-        depth for its decision tree.
+    max_depth : int
+        Required for 'decision_tree'.
 
     one_tree_per_output : bool, default=False
         Used only by 'decision_tree' method.
@@ -110,38 +80,27 @@ class ApproxSynthesisConfig:
         Whether to show simulation progress.
     """
 
-    # After instantiation/validation the method is always turned into an
-    # AlsMethod and anything that can be a list is turned into a list. For ease
-    # of use of this struct in the runner code.
     method: AlsMethod
-    circuit: List[str]
-    dataset: List[str]
-    generate: List[str | int | float] | None
-    subset: List[int | float] | None
+    circuit: Circuit
+    dataset: str
     resynthesis: bool
     error: float | None
     max_iters: int | None
-    save: str | None
-    resume: bool
-    max_depth: List[int] | None
+    max_depth: int | None
     one_tree_per_output: bool
     show_progress: bool
 
     def __init__(
         self,
-        method: AlsMethod | str,
-        circuit: str | List[str],
-        dataset: str | List[str],
-        generate: str | int | float | List[str | int | float] | None = None,
-        subset: int | float | List[int | float] | None = None,
-        resynthesis: bool = False,
-        error: float | None = None,
-        max_iters: int | None = None,
-        save: str | None = None,
-        resume: bool = False,
-        max_depth: int | List[int] | None = None,
-        one_tree_per_output: bool = False,
-        show_progress: bool = False,
+        method: AlsMethod,
+        circuit: Circuit,
+        dataset: str,
+        resynthesis: bool,
+        error: float | None,
+        max_iters: int | None,
+        max_depth: int | None,
+        one_tree_per_output: bool,
+        show_progress: bool,
     ):
         """
         Instantiate and validate an ApproxSynthesisConfig.
@@ -152,21 +111,14 @@ def __init__(
             If required parameters are missing or invalid.
         """
         self.method = _validate_method(method)
-        self.circuit = _validate_circuits(circuit)
-        self.dataset = _validate_datasets(dataset, self.circuit, generate)
-
-        _validate_subset_vs_generate_exclusivity(generate, subset)
-
-        self.generate = _validate_generate(generate, self.dataset)
-        self.subset = _validate_subset(subset, self.dataset)
+        self.circuit = circuit
+        self.dataset = _validate_dataset(dataset)
 
         self.resynthesis = resynthesis
         self.error = _validate_error(error, self.method, self.resynthesis)
         self.max_iters = _validate_max_iters(max_iters, self.method, self.resynthesis)
 
-        self.max_depth = _validate_max_depth(max_depth, self.method, self.circuit)
-        self.save = _validate_save(save, resume)
-        self.resume = resume
+        self.max_depth = _validate_max_depth(max_depth, self.method)
         self.one_tree_per_output = one_tree_per_output
         self.show_progress = show_progress
 
@@ -196,125 +148,20 @@ def _validate_method(method: AlsMethod | str) -> AlsMethod:
     return method
 
 
-def _validate_circuits(circuits: str | List[str]) -> List[str]:
+def _validate_dataset(
+    dataset: str,
+) -> str:
     """
-    Validates the existence of circuit files.
+    Ensures the dataset file exists.
+    TODO: We could maybe validate that the values match up with the circuit's inputs.
 
-    Checks if each file path in `circuit` exists. If not, raises a ValueError.
-    Ensures input Verilog files are valid for further processing.
+    Raises a ValueError if the dataset file is missing or doesn't match the
+    circuit inputs.
     """
-    circuits = circuits if isinstance(circuits, list) else [circuits]
-    for circuit in circuits:
-        if not os.path.isfile(circuit):
-            raise ValueError(f"Circuit file does not exist: {circuit}")
-
-    return circuits
+    if not os.path.isfile(dataset):
+        raise ValueError(f"Dataset file not found: {dataset}..")
 
-
-def _validate_datasets(
-    datasets: str | List[str],
-    circuits: List[str],
-    generate: str | int | float | List[str | int | float] | None,
-) -> List[str]:
-    """
-    Validates dataset files.
-
-    Ensures each dataset file exists, unless 'generate' is set.
-
-    Uses `_ensure_length_match` to validate correspondence with circuit files.
-    Raises a ValueError if any required dataset file is missing.
-    """
-    datasets = _ensure_length_match(datasets, circuits, "dataset", "circuit")
-    if generate is None:
-        for f in datasets:
-            if not os.path.isfile(f):
-                raise ValueError(
-                    f"Dataset file not found: {f}. Use 'generate' or provide a valid file."
-                )
-    return datasets
-
-
-def _validate_subset_vs_generate_exclusivity(
-    generate: str | int | float | List[str | int | float] | None,
-    subset: int | float | List[int | float] | None,
-):
-    """
-    Ensures mutual exclusivity between 'generate' and 'subset'.
-
-    Both fields are incompatible; only one may be provided.
-    Raises a ValueError if both are given.
-    """
-    if generate is not None and subset is not None:
-        raise ValueError("Cannot specify both 'generate' and 'subset'.")
-
-
-def _validate_generate(
-    generate: str | int | float | List[str | int | float] | None,
-    datasets: List[str],
-) -> List[str | int | float] | None:
-    """
-    Validates 'generate' parameters.
-
-    Ensures that each generation rule is valid:
-    - str: must be 'exhaustive'
-    - int: must be > 0
-    - float: must be in (0, 1]
-
-    Uses `_ensure_length_match` to align with dataset files.
-    Raises ValueError for invalid values.
-    """
-    if generate is not None:
-        gens = _ensure_length_match(
-            generate,
-            datasets,
-            "generate",
-            "dataset",
-        )
-        for g in gens:
-            if isinstance(g, str):
-                if g != "exhaustive":
-                    raise ValueError(f"Invalid generate string: {g}")
-                # TODO: Check number of inputs in circuit (must be <= 32)
-            elif isinstance(g, int):
-                if g <= 0:
-                    raise ValueError("Integer generate must be > 0")
-                # TODO: Check it's <= max input count of the circuit
-            elif isinstance(g, float):
-                if not (0 < g <= 1):
-                    raise ValueError("Percentage generate must be between 0 and 1.")
-            else:
-                raise ValueError(f"Invalid generate value: {g}")
-        return gens
-
-
-def _validate_subset(
-    subset: int | float | List[int | float] | None,
-    datasets: List[str],
-) -> List[int | float] | None:
-    """
-    Validates 'subset'.
-
-    Ensures values are valid:
-    - int: must be > 0
-    - float: must be in (0, 1]
-
-    Uses `_ensure_length_match` to align with dataset files.
-    Raises ValueError for out-of-range values.
-    """
-    if subset is not None:
-        fracs = _ensure_length_match(
-            subset,
-            datasets,
-            "subset",
-            "dataset",
-        )
-        for f in fracs:
-            if isinstance(f, int) and f <= 0:
-                raise ValueError("subset as int must be > 0")
-            elif isinstance(f, float) and not (0 < f <= 1):
-                raise ValueError("subset as percentage must be between 0 and 1")
-            # TODO: Check dataset size to ensure integer fraction < full dataset
-        return fracs
+    return dataset
 
 
 def _validate_error(
@@ -348,10 +195,13 @@ def _validate_max_iters(
     """
     Validates 'max_iters'.
 
-    Required only for methods that become iterative under resynthesis,
-    like decision_tree; because they might never reach the error threshold.
+    Required for iterative methods.
     Raises ValueError if missing in that case.
     """
+
+    if method in _ITERATIVE_METHODS:
+        if max_iters is None:
+            raise ValueError(f"'max_iters' is required for method {method}")
     if (
         method in _ITERATIVE_METHODS_WITH_RESYNTHESIS
         and resynthesis
@@ -361,8 +211,9 @@ def _validate_max_iters(
 
 
 def _validate_max_depth(
-    max_depth: int | List[int] | None, method: AlsMethod, circuits: List[str]
-) -> List[int] | None:
+    max_depth: int | None,
+    method: AlsMethod,
+) -> int | None:
     """
     Validates 'max_depth' for decision trees.
 
@@ -378,74 +229,7 @@ def _validate_max_depth(
         if max_depth is None:
             raise ValueError(f"'max_depth' is required for method f{method}.")
         else:
-            max_depths = _ensure_length_match(
-                max_depth,
-                circuits,
-                "max_depth",
-                "circuit",
-            )
-            for m in max_depths:
-                if m <= 0:
-                    raise ValueError("max_depth must be > 0")
-                # TODO: Check dataset size to ensure integer fraction < full dataset
-            return max_depths
-
-
-def _validate_save(save: str | None, resume: bool) -> str | None:
-    """
-    Validates parameters for continuing from a saved file.
+            if max_depth <= 1:
+                raise ValueError("max_depth must be > 1")
 
-    Ensures 'save' is provided and exists on disk when
-    'resume' is True. Raises ValueError otherwise.
-    """
-    if resume:
-        if save is None:
-            raise ValueError("To continue from save, 'save' must be provided.")
-        if not os.path.isfile(save):
-            raise ValueError(f"To continue from save, 'save' must exist: {save}")
-
-    return save
-
-
-def _ensure_length_match[T: str | int | float](
-    values: T | List[T],
-    ref_values: List[str],
-    field_name: str,
-    ref_name: str,
-) -> List[T]:
-    """
-    Ensures list parameters match reference list length.
-
-    Converts scalar to list and compares length if both are lists.
-    Raises ValueError if mismatched or list is given with scalar reference.
-
-    When converting scalar to list, the list returned contains N copies of the
-    scalar value, where N is the length of the reference list.
-    This is done so that the config object ends up with only lists that all are
-    of equal length, for easy handling in the runner code.
-
-    Parameters
-    ----------
-    values : Any
-        Parameter to validate (scalar or list).
-    ref_values : Any
-        Reference parameter (scalar or list).
-    field_name : str
-        Name of the field being validated (for error messages).
-    ref_name : str
-        Name of the reference field (for error messages).
-
-    Returns
-    -------
-    List[Any]
-        The validated values, always as a list.
-    """
-    if isinstance(values, list):
-        if len(values) != len(ref_values):
-            raise ValueError(
-                f"'{field_name}' length ({len(values)}) must match {ref_name} count ({len(ref_values)})"
-            )
-        return values
-    else:
-        value: T = values
-        return [value] * len(ref_values)
+            return max_depth
diff --git a/utils.py b/utils.py
index 5b950f0..1154106 100644
--- a/utils.py
+++ b/utils.py
@@ -2,6 +2,7 @@
 from datetime import datetime
 import random
 import string
+from typing import List
 import numpy as np
 import math
 from random import uniform, gauss, triangular
@@ -79,7 +80,7 @@ def get_random(bits: int, distribution='uniform', samples=1, **kwargs):
 
     return data
 
-def read_dataset(filename, base, max_lines=None):
+def read_dataset(filename: str, base: int, max_lines: None | int =None) -> List[List[int]]:
     """
     Reads a dataset or circuit output file like those generated by the
     `Circuit.generate_dataset` file or `Circuit.exact_output`.
@@ -96,6 +97,13 @@ def read_dataset(filename, base, max_lines=None):
     max_lines : None | int
         The maximum amount of lines to read, in case the user doesn't want to
         use the entire dataset.
+
+    Returns
+    ----------
+    dataset : List[List[int]]
+        The data read from the file. The returned list is ordered by rows first,
+        then columns. For example indexing dataset[2][5] requests the 5th
+        element of the 2nd row.
     """
     with open(filename, "r") as f:
         if max_lines is not None:

From 27629fdffb60290325d34977ed8876f590d1b3c5 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Wed, 7 May 2025 23:42:38 -0600
Subject: [PATCH 06/65] Add metrics to configuration class and add a default
 metric.

---
 __main__.py      | 28 ++++++++++++++++++++--------
 configuration.py | 41 +++++++++++++++++++++++++++++++++++++++--
 2 files changed, 59 insertions(+), 10 deletions(-)

diff --git a/__main__.py b/__main__.py
index 7fb00a6..c862934 100644
--- a/__main__.py
+++ b/__main__.py
@@ -1,4 +1,5 @@
 import argparse
+from typing import List
 from circuit import Circuit
 from configuration import ApproxSynthesisConfig, AlsMethod, Metric
 
@@ -12,7 +13,8 @@
 #   - If we want to let users provide custom tech libraries through optional
 #     flags we'll need to make Circuit accept arbitrary paths to the needed tech
 #     files.
-TECH="NanGate15nm"
+TECH = "NanGate15nm"
+
 
 def parse_generate(value):
     try:
@@ -25,6 +27,7 @@ def parse_generate(value):
                 f"Invalid generate_dataset value: {value}. Must be int or float."
             )
 
+
 def main():
     parser = argparse.ArgumentParser(
         description="AxLS CLI. Provides a simplified interface to the package's functionality."
@@ -50,10 +53,16 @@ def main():
     args = parser.parse_args()
 
     if args.subcommand == "run":
+        metrics: List[str] = args.metrics
+        if len(metrics) == 0:
+            metrics = [ Metric.MEAN_RELATIVE_ERROR_DISTANCE ]
+
         try:
+            circuit = Circuit(args.circuit, TECH)
             config = ApproxSynthesisConfig(
                 method=args.method,
-                circuit=args.circuit,
+                circuit=circuit,
+                metrics=args.metrics,
                 dataset=args.dataset,
                 resynthesis=args.resynthesis,
                 error=args.error,
@@ -87,10 +96,10 @@ def run_arguments(run_parser):
     run_parser.add_argument("dataset", help="Dataset file to run simulations with.")
     run_parser.add_argument(
         "metrics",
-        nargs="+",
+        nargs="*",
         choices=[m.value for m in Metric],
         # TODO: Add docs about what each metric is
-        help="Metrics to calculate, at least one must be given.",
+        help="Metrics to calculate, defaults to mred.",
     )
     run_parser.add_argument(
         "--resynthesis", action="store_true", help="If provided will use resynthesis."
@@ -141,6 +150,7 @@ def generate_arguments(generate_parser):
         """,
     )
 
+
 def generate_dataset(args: argparse.Namespace):
     circuit = Circuit(args.circuit, TECH)
 
@@ -148,15 +158,17 @@ def generate_dataset(args: argparse.Namespace):
     if isinstance(size, int):
         if not size > 0:
             raise argparse.ArgumentTypeError(
-                f"Dataset size must be greater than 0: {size}")
+                f"Dataset size must be greater than 0: {size}"
+            )
 
     if isinstance(size, float):
         if not (0 < size <= 1.0):
             raise argparse.ArgumentTypeError(
-                f"Dataset size must be greater than 0: {size}")
+                f"Dataset size must be greater than 0: {size}"
+            )
 
-        max_inputs = 2**(len(circuit.inputs))
-        size = round(max_inputs*size)
+        max_inputs = 2 ** (len(circuit.inputs))
+        size = round(max_inputs * size)
 
     circuit.generate_dataset(args.dataset, size)
 
diff --git a/configuration.py b/configuration.py
index 6b48e5f..b255a70 100644
--- a/configuration.py
+++ b/configuration.py
@@ -1,9 +1,11 @@
 import os
 
 from enum import Enum
+from typing import List
 
 from circuit import Circuit
 
+
 class AlsMethod(str, Enum):
     CONSTANT_INPUTS = "inconst"
     CONSTANT_OUTPUTS = "outconst"
@@ -18,6 +20,7 @@ def __repr__(self):
     def __str__(self):
         return self.value
 
+
 class Metric(str, Enum):
     MEAN_ERROR_DISTANCE = "med"
     WORST_CASE_ERROR = "wce"
@@ -45,7 +48,7 @@ class ApproxSynthesisConfig:
 
     Parameters
     ----------
-    method : AlsMethod
+    method : AlsMethod | str
         One of the supported methods. Can use the AlsMethod enum or one of the
         following string names: 'inconst', 'outconst', 'probrun',
         'significance', 'ccarving', or 'decision_tree'.
@@ -57,6 +60,9 @@ class ApproxSynthesisConfig:
         Path to the dataset file.
         TODO: Document dataset file format.
 
+    metrics : List[str]
+        Metrics to calculate for the execution.
+
     resynthesis : bool, default=False
         Whether to use resynthesis.
 
@@ -83,6 +89,7 @@ class ApproxSynthesisConfig:
     method: AlsMethod
     circuit: Circuit
     dataset: str
+    metrics: List[Metric]
     resynthesis: bool
     error: float | None
     max_iters: int | None
@@ -92,9 +99,10 @@ class ApproxSynthesisConfig:
 
     def __init__(
         self,
-        method: AlsMethod,
+        method: AlsMethod | str,
         circuit: Circuit,
         dataset: str,
+        metrics: List[Metric | str],
         resynthesis: bool,
         error: float | None,
         max_iters: int | None,
@@ -113,6 +121,7 @@ def __init__(
         self.method = _validate_method(method)
         self.circuit = circuit
         self.dataset = _validate_dataset(dataset)
+        self.metrics = _validate_metrics(metrics)
 
         self.resynthesis = resynthesis
         self.error = _validate_error(error, self.method, self.resynthesis)
@@ -148,6 +157,34 @@ def _validate_method(method: AlsMethod | str) -> AlsMethod:
     return method
 
 
+def _validate_metrics(metrics: List[str | Metric]) -> List[Metric]:
+    """
+    Validates the metrics.
+
+    If a metric is given as a string, this functions tries to convert it to a
+    Metric enum.
+    Raises a ValueError if the metric name is invalid.
+
+    Ensures consistency for downstream logic by enforcing enum usage.
+    """
+
+    result_metrics: List[Metric] = []
+
+    for metric in metrics:
+        if isinstance(metric, str):
+            try:
+                result_metrics.append(Metric(metric))
+            except ValueError:
+                available_metrics = ", ".join([metric.value for metric in Metric])
+                raise ValueError(
+                    f"{metric} is not a valid {Metric.__name__}. Available metrics are: {available_metrics}"
+                )
+        else:
+            result_metrics.append(metric)
+
+    return result_metrics
+
+
 def _validate_dataset(
     dataset: str,
 ) -> str:

From 63e97fe8c92148a985d4787cfbce68bac21da52c Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 8 May 2025 00:01:13 -0600
Subject: [PATCH 07/65] Document that the error threshold is measured in mean
 relative error distance

---
 __main__.py      | 2 +-
 configuration.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/__main__.py b/__main__.py
index c862934..2ed89a8 100644
--- a/__main__.py
+++ b/__main__.py
@@ -107,7 +107,7 @@ def run_arguments(run_parser):
     run_parser.add_argument(
         "--error",
         type=float,
-        help="Maximum error threshold to stop iterations. (0 < x <= 1).",
+        help="Maximum error threshold to stop iterations. (0 < x <= 1). The error used is Mean Relative Error Distance.",
     )
     run_parser.add_argument(
         "--max-iters",
diff --git a/configuration.py b/configuration.py
index b255a70..7a6101e 100644
--- a/configuration.py
+++ b/configuration.py
@@ -70,6 +70,8 @@ class ApproxSynthesisConfig:
         The maximum error threshold permitted. Required for iterative methods,
         like pruning methods or ML methods with resynthesis.
 
+        The error used is the Mean Relative Error Distance.
+
     max_iters : int, optional
         Maximum amount of iterations to execute. Used in iterative methods,
         like pruning methods or ML methods with resynthesis.

From 16d09fc869e3075f718fa46e127a46402f9f88cf Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 8 May 2025 11:44:26 -0600
Subject: [PATCH 08/65] Fix lint warnings for configuration

---
 configuration.py | 63 ++++++++++++++++++++++++------------------------
 1 file changed, 31 insertions(+), 32 deletions(-)

diff --git a/configuration.py b/configuration.py
index 7a6101e..826b846 100644
--- a/configuration.py
+++ b/configuration.py
@@ -1,7 +1,7 @@
 import os
 
 from enum import Enum
-from typing import List
+from typing import override
 
 from circuit import Circuit
 
@@ -14,9 +14,11 @@ class AlsMethod(str, Enum):
     CCARVING = "ccarving"
     DECISION_TREE = "decision_tree"
 
+    @override
     def __repr__(self):
         return self.value
 
+    @override
     def __str__(self):
         return self.value
 
@@ -60,13 +62,13 @@ class ApproxSynthesisConfig:
         Path to the dataset file.
         TODO: Document dataset file format.
 
-    metrics : List[str]
+    metrics : list[Metric | str]
         Metrics to calculate for the execution.
 
     resynthesis : bool, default=False
         Whether to use resynthesis.
 
-    error : float (0 < x <= 1)
+    error : float (0 < x <= 1), optional
         The maximum error threshold permitted. Required for iterative methods,
         like pruning methods or ML methods with resynthesis.
 
@@ -76,7 +78,7 @@ class ApproxSynthesisConfig:
         Maximum amount of iterations to execute. Used in iterative methods,
         like pruning methods or ML methods with resynthesis.
 
-    max_depth : int
+    max_depth : int, optional
         Required for 'decision_tree'.
 
     one_tree_per_output : bool, default=False
@@ -91,7 +93,7 @@ class ApproxSynthesisConfig:
     method: AlsMethod
     circuit: Circuit
     dataset: str
-    metrics: List[Metric]
+    metrics: list[Metric]
     resynthesis: bool
     error: float | None
     max_iters: int | None
@@ -104,13 +106,13 @@ def __init__(
         method: AlsMethod | str,
         circuit: Circuit,
         dataset: str,
-        metrics: List[Metric | str],
-        resynthesis: bool,
-        error: float | None,
-        max_iters: int | None,
-        max_depth: int | None,
-        one_tree_per_output: bool,
-        show_progress: bool,
+        metrics: list[Metric | str],
+        resynthesis: bool = False,
+        error: float | None = None,
+        max_iters: int | None = None,
+        max_depth: int | None = None,
+        one_tree_per_output: bool = False,
+        show_progress: bool = False,
     ):
         """
         Instantiate and validate an ApproxSynthesisConfig.
@@ -133,6 +135,7 @@ def __init__(
         self.one_tree_per_output = one_tree_per_output
         self.show_progress = show_progress
 
+    @override
     def __repr__(self):
         fields = ", ".join(f"{key}={value!r}" for key, value in self.__dict__.items())
         return f"{self.__class__.__name__}({fields})"
@@ -147,19 +150,18 @@ def _validate_method(method: AlsMethod | str) -> AlsMethod:
 
     Ensures consistency for downstream logic by enforcing enum usage.
     """
-    if isinstance(method, str):
-        try:
-            method = AlsMethod(method)
-        except ValueError:
-            available_methods = ", ".join([method.value for method in AlsMethod])
-            raise ValueError(
-                f"{method} is not a valid {AlsMethod.__name__}. Available methods are: {available_methods}"
-            )
+    try:
+        method = AlsMethod(method)
+    except ValueError:
+        available_methods = ", ".join([method.value for method in AlsMethod])
+        raise ValueError(
+            f"{method} is not a valid {AlsMethod.__name__}. Available methods are: {available_methods}"
+        )
 
     return method
 
 
-def _validate_metrics(metrics: List[str | Metric]) -> List[Metric]:
+def _validate_metrics(metrics: list[str | Metric]) -> list[Metric]:
     """
     Validates the metrics.
 
@@ -170,19 +172,16 @@ def _validate_metrics(metrics: List[str | Metric]) -> List[Metric]:
     Ensures consistency for downstream logic by enforcing enum usage.
     """
 
-    result_metrics: List[Metric] = []
+    result_metrics: list[Metric] = []
 
     for metric in metrics:
-        if isinstance(metric, str):
-            try:
-                result_metrics.append(Metric(metric))
-            except ValueError:
-                available_metrics = ", ".join([metric.value for metric in Metric])
-                raise ValueError(
-                    f"{metric} is not a valid {Metric.__name__}. Available metrics are: {available_metrics}"
-                )
-        else:
-            result_metrics.append(metric)
+        try:
+            result_metrics.append(Metric(metric))
+        except ValueError:
+            available_metrics = ", ".join([metric.value for metric in Metric])
+            raise ValueError(
+                f"{metric} is not a valid {Metric.__name__}. Available metrics are: {available_metrics}"
+            )
 
     return result_metrics
 

From a26465b4c685b5e7e5c3d78c5adfca1a7b2132b1 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 8 May 2025 11:44:45 -0600
Subject: [PATCH 09/65] Add hamming distance metric

---
 configuration.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configuration.py b/configuration.py
index 826b846..de6c69a 100644
--- a/configuration.py
+++ b/configuration.py
@@ -24,6 +24,7 @@ def __str__(self):
 
 
 class Metric(str, Enum):
+    HAMMING_DISTANCE = "hd"
     MEAN_ERROR_DISTANCE = "med"
     WORST_CASE_ERROR = "wce"
     MEAN_RELATIVE_ERROR_DISTANCE = "mred"

From 1aca2fad11be7eed3f8376406ce65640a44a32ad Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 8 May 2025 11:46:31 -0600
Subject: [PATCH 10/65] Add csv handling to the configuration class

---
 __main__.py      | 15 ++++++++----
 configuration.py | 59 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 5 deletions(-)

diff --git a/__main__.py b/__main__.py
index 2ed89a8..11c01bd 100644
--- a/__main__.py
+++ b/__main__.py
@@ -1,5 +1,4 @@
 import argparse
-from typing import List
 from circuit import Circuit
 from configuration import ApproxSynthesisConfig, AlsMethod, Metric
 
@@ -53,9 +52,9 @@ def main():
     args = parser.parse_args()
 
     if args.subcommand == "run":
-        metrics: List[str] = args.metrics
+        metrics: list[str] = args.metrics
         if len(metrics) == 0:
-            metrics = [ Metric.MEAN_RELATIVE_ERROR_DISTANCE ]
+            metrics = [Metric.MEAN_RELATIVE_ERROR_DISTANCE, Metric.ALS_TIME]
 
         try:
             circuit = Circuit(args.circuit, TECH)
@@ -70,6 +69,7 @@ def main():
                 max_depth=args.max_depth,
                 one_tree_per_output=args.one_tree_per_output,
                 show_progress=args.show_progress,
+                csv=args.csv,
             )
         except ValueError as e:
             parser.error(str(e))
@@ -99,7 +99,7 @@ def run_arguments(run_parser):
         nargs="*",
         choices=[m.value for m in Metric],
         # TODO: Add docs about what each metric is
-        help="Metrics to calculate, defaults to mred.",
+        help="Metrics to calculate, defaults to mred and time.",
     )
     run_parser.add_argument(
         "--resynthesis", action="store_true", help="If provided will use resynthesis."
@@ -130,8 +130,13 @@ def run_arguments(run_parser):
         type=str,
         help="""Path to a file to save the output in csv format.
         If the file doesn't exist, it will be created, if it exists it will be appended to.
+
         The output will be given as a single line with the following columns:
-            method, circuit, flag1, flag2, ...,  metric1, metric2, ...""",
+            method, circuit, resynthesis, error, max_iters, max_depth, one_tree_per_output, metric1, metric2, ...
+
+        - bool values are stored as "True" or "False".
+        - optional fields will just be left blank if not provided.
+        """,
     )
 
 
diff --git a/configuration.py b/configuration.py
index de6c69a..03fe0d0 100644
--- a/configuration.py
+++ b/configuration.py
@@ -89,6 +89,17 @@ class ApproxSynthesisConfig:
 
     show_progress : bool, default=False
         Whether to show simulation progress.
+
+    csv : str, optional
+        Path to a file to save the output in csv format.
+        If the file doesn't exist, it will be created with a header for the
+        columns, if it exists it will be appended to.
+
+        The output will be given as a single line with the following columns:
+            method, circuit, resynthesis, error, max_iters, max_depth, one_tree_per_output, metric1, metric2, ...
+
+        - bool values are stored as "True" or "False".
+        - optional fields will just be left blank if not provided.
     """
 
     method: AlsMethod
@@ -101,6 +112,7 @@ class ApproxSynthesisConfig:
     max_depth: int | None
     one_tree_per_output: bool
     show_progress: bool
+    csv: str | None
 
     def __init__(
         self,
@@ -114,6 +126,7 @@ def __init__(
         max_depth: int | None = None,
         one_tree_per_output: bool = False,
         show_progress: bool = False,
+        csv: str | None = None,
     ):
         """
         Instantiate and validate an ApproxSynthesisConfig.
@@ -135,12 +148,58 @@ def __init__(
         self.max_depth = _validate_max_depth(max_depth, self.method)
         self.one_tree_per_output = one_tree_per_output
         self.show_progress = show_progress
+        self.csv = csv
 
     @override
     def __repr__(self):
         fields = ", ".join(f"{key}={value!r}" for key, value in self.__dict__.items())
         return f"{self.__class__.__name__}({fields})"
 
+    def csv_columns(self) -> list[str]:
+        """
+        Returns the names of the columns used if exporting this config's
+        execution to a CSV
+        """
+        columns = [
+            "method",
+            "circuit",
+            "resynthesis",
+            "error",
+            "max_iters",
+            "max_depth",
+            "one_tree_per_output",
+        ]
+        for metric in self.metrics:
+            columns.append(metric.value)
+
+        return columns
+
+    def csv_values(self, results: dict[Metric, float]) -> list[str]:
+        """
+        Returns the values of the columns if exporting this config's execution to
+        a CSV row.
+        A Results dict must be provided, it is assumed it contains the results
+        for the metrics given to this config.
+        """
+        values = [
+            self.method.value,
+            self.circuit.topmodule,
+            self.resynthesis,
+            self.error,
+            self.max_iters,
+            self.max_depth,
+            self.one_tree_per_output,
+        ]
+
+        for metric in self.metrics:
+            values.append(results[metric])
+
+        stringified_values = [
+            str(value) if value is not None else "" for value in values
+        ]
+
+        return stringified_values
+
 
 def _validate_method(method: AlsMethod | str) -> AlsMethod:
     """

From 3916076d047ddaf50973220b9277e64142868869 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 8 May 2025 11:58:32 -0600
Subject: [PATCH 11/65] Clean up circuiterror, make sure compute_error returns
 float

---
 circuiterror.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/circuiterror.py b/circuiterror.py
index ea4b329..b8cb5b5 100644
--- a/circuiterror.py
+++ b/circuiterror.py
@@ -1,4 +1,3 @@
-from enum import Enum
 import numpy as np
 
 def extract_numbers(filename):
@@ -26,15 +25,17 @@ def extract_numbers(filename):
     return result
 
 
-def compute_error(metric, original, approximate):
+def compute_error(metric, original, approximate) -> float:
     '''
     Computes the error between two different testbench output files
 
+    Raises a ValueError if the metric is invalid
+
     Parameters
     ----------
     metric : string
         equation to measure the error
-        options med, wce, mred, msed
+        options hd, med, wce, mred, msed
     original : string
         path to the original results text file
     approximate : string
@@ -63,18 +64,14 @@ def compute_error(metric, original, approximate):
         0 if original_output[x] == 0 else error_distance[x]/original_output[x]
         for x in range(0,len(original_output))]
 
-    # Error Rate:
-    if (metric == "er"):
-        return round(sum((error>0 for error in error_distance))/total,3)
-
     # Mean Hamming Distance see: https://stackoverflow.com/questions/40875282/fastest-way-to-get-hamming-distance-for-integer-array
     if (metric == "hd"):
         hamming_distance=np.bitwise_xor(original_output,approximate_output)
         hamming_distance=[f'{hd:b}'.count('1') for hd in hamming_distance]
-        return round(np.mean(hamming_distance),3)
+        return round(float(np.mean(hamming_distance)),3)
 
     # Mean Error Distance MED := sum { ED(bj,b) * pj }
-    if (metric == "med"):
+    elif (metric == "med"):
         mean_error = sum(error_distance) / len(error_distance)
         return round(mean_error,3)
 
@@ -91,3 +88,6 @@ def compute_error(metric, original, approximate):
     elif (metric == "msed"):
         msed = sum(square_error_distance)/len(square_error_distance)
         return round(msed,3)
+
+    else:
+        raise ValueError(f"Invalid metric: {metric}")

From c1afa39add347df67ce4e26bfe0e351b5e067094 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 8 May 2025 11:59:19 -0600
Subject: [PATCH 12/65] Separate simulation and simulation + compute error

---
 circuit.py | 55 ++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 20 deletions(-)

diff --git a/circuit.py b/circuit.py
index 8750900..2c9e523 100644
--- a/circuit.py
+++ b/circuit.py
@@ -522,7 +522,8 @@ def exact_output (self, testbench, output_file):
 
         return
 
-    def simulate_and_compute_error (self, testbench, metric, exact_output, new_output):
+
+    def simulate(self, testbench, approximate_output):
         '''
         Simulates the actual circuit tree (with deletions)
         Creates an executable using icarus, end then execute it to obtain the
@@ -532,26 +533,11 @@ def simulate_and_compute_error (self, testbench, metric, exact_output, new_outpu
         ----------
         testbench : string
             path to the testbench file
-        metric : string
-            equation to compute the error
-            options med, wce, wcre,mred, msed
-        exact_output : string
-            Path to the output file of the original exact circuit to compare
-            against. This file can be created with the `exact_output` method.
-        new_output : string
+        approximate_output : string
             Path to the output file where simulation results will be written.
             The user must provide the full file path and name. If the file
             exists, it will be overwritten.
-        clean : bool
-            if true, deletes all the generated files
-
-        Returns
-        -------
-        float
-            error of the current circuit tree
         '''
-
-
         name = get_name(5)
         rtl = self.write_to_disk(name)
 
@@ -573,13 +559,42 @@ def simulate_and_compute_error (self, testbench, metric, exact_output, new_outpu
         system(f"cd \"{out}\"; ./{top}")
         os.chdir(cwd)
 
-        rename(out + "/output.txt", new_output)
-
-        error = compute_error(metric, exact_output, new_output)
+        rename(out + "/output.txt", approximate_output)
 
         remove(rtl)
         remove(f"{out}/{top}")
 
+
+    def simulate_and_compute_error (self, testbench, exact_output, new_output, metric):
+        '''
+        Simulates the actual circuit tree (with deletions)
+        Creates an executable using icarus, end then execute it to obtain the
+        output of the testbench
+
+        Parameters
+        ----------
+        testbench : string
+            path to the testbench file
+        exact_output : string
+            Path to the output file of the original exact circuit to compare
+            against. This file can be created with the `exact_output` method.
+        new_output : string
+            Path to the output file where simulation results will be written.
+            The user must provide the full file path and name. If the file
+            exists, it will be overwritten.
+        metric : string
+            equation to compute the error
+            options med, wce, wcre,mred, msed
+
+        Returns
+        -------
+        float
+            error of the current circuit tree
+        '''
+        self.simulate(testbench, new_output)
+
+        error = compute_error(metric, exact_output, new_output)
+
         return error
 
     def generate_dataset(self, filename, samples, distribution='uniform', **kwargs):

From 19314440012350dfe5fdc32a92fcf288e377c7e0 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 8 May 2025 12:00:17 -0600
Subject: [PATCH 13/65] Add in write_tb option to specify a vcd dump file. Add
 a method to generate the SAIF data from a vcd dump for a circuit

---
 circuit.py | 286 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 284 insertions(+), 2 deletions(-)

diff --git a/circuit.py b/circuit.py
index 2c9e523..d47dbfa 100644
--- a/circuit.py
+++ b/circuit.py
@@ -1,5 +1,6 @@
 import os
 import re
+import datetime
 
 from graphviz import Digraph
 from os import path, remove, system, rename
@@ -665,7 +666,7 @@ def generate_dataset(self, filename, samples, distribution='uniform', **kwargs):
 
         return
 
-    def write_tb(self, filename, dataset_file, iterations=None, timescale= '10ns / 1ps', delay=10, format='h', dump_vcd=False, show_progress=True):
+    def write_tb(self, filename, dataset_file, iterations=None, timescale= '10ns / 1ps', delay=10, format='h', dump_vcd=None, show_progress=True):
         '''
         Writes a basic testbench for the circuit.
 
@@ -692,6 +693,9 @@ def write_tb(self, filename, dataset_file, iterations=None, timescale= '10ns / 1
                 'b' for binary
         show_progress: bool, default = True
             Whether the testbench should print its progress as it executes.
+        dump_vcd (optional): str
+            If provided, executing the testbench will create a vcd file at the
+            given path.
 
         Returns
         -------
@@ -774,7 +778,7 @@ def write_tb(self, filename, dataset_file, iterations=None, timescale= '10ns / 1
             text += '$display("-- Beginning Simulation --");\n\n'
 
         if dump_vcd:
-            text=f'{text} $dumpfile("./{self.topmodule}.vcd");\n' \
+            text=f'{text} $dumpfile("{dump_vcd}");\n' \
                  f' $dumpvars(0,{self.topmodule}_tb);\n'
 
         relative_dataset_path = os.path.relpath(dataset_file, start=os.path.dirname(filename))
@@ -817,6 +821,284 @@ def write_tb(self, filename, dataset_file, iterations=None, timescale= '10ns / 1
 
         return
 
+    def generate_saif_from_vcd(
+        self, saif: str, vcd_file_path: str, verbose: bool = False
+    ) -> None:
+        """
+        Generates a SAIF file from a vcd file. A vcd file can be created by
+        running a simulation with a testbench that was created by created by
+        `write_tb` with a `dump_vcd` parameter.
+
+        The SAIF file is then parsed and the netlist annotated with execution
+        data.
+
+        Parameters
+        ----------
+        saif: string
+            Path to the saif file generated.
+        vcd_file_path: string
+            Path to the vcd file.
+            The user must provide the full file path and name. If the file
+            exists, it will be overwritten.
+        verbose: bool
+            Whether to print verbose output
+        """
+        saifversion = "2.0"
+        direction = "backward"
+        design = self.topmodule
+        vendor = "AxPy Inc"
+        program_name = "open_vcd2saif"
+        version = "v0"
+        divider = "/ "
+        timescale = "1 ps"
+
+        # 1st pass: get variables
+        var_list = []
+        level = 0
+
+        count = 0
+        total = 0
+
+        def file_read(filename):
+            for row in open(filename, "r"):
+                yield row.split("\n")[0]
+
+        vcd_file = file_read(vcd_file_path)
+
+        for line in vcd_file:
+            search = re.search(r"\$scope", line)
+            if search is not None:
+                ls = line.split()
+                parent = ls[2]
+                level += 1
+                continue
+
+            search = re.search(r"\$upscope", line)
+            if search is not None:
+                level -= 1
+                continue
+
+            search = re.search(r"\$var", line)
+            if search is not None:
+                ls = line.split()
+                name = ls[4]
+                alias = ls[3]
+                var_len = int(ls[2])
+                m = re.findall(r"\d+", ls[5])
+                flag_mult = 0
+                if len(m) == 2:
+                    n0 = int(m[1])
+                    flag_mult = 1
+                elif len(m) == 1:
+                    n0 = int(m[0])
+                    flag_mult = 1
+                else:
+                    n0 = 1
+
+                if var_len == 1:
+                    var_list.append(
+                        {
+                            "name": name,
+                            "alias": alias,
+                            "parent": parent,
+                            "level": level,
+                            "len": 1,
+                            "bit_index": n0,
+                            "multi_bit": flag_mult,
+                            "high": 0,
+                            "low": 0,
+                            "x": 0,
+                            "ig": 0,
+                            "last": "2",
+                            "toggle": 0,
+                        }
+                    )
+                else:
+                    for i in range(var_len):
+                        var_list.append(
+                            {
+                                "name": name,
+                                "alias": alias,
+                                "parent": parent,
+                                "level": level,
+                                "len": var_len,
+                                "bit_index": i,
+                                "multi_bit": flag_mult,
+                                "high": 0,
+                                "low": 0,
+                                "x": 0,
+                                "ig": 0,
+                                "last": "2",
+                                "toggle": 0,
+                            }
+                        )
+                continue
+            if verbose:
+                count += 1
+                print(f"Pass #1: {count}/{total}")
+
+        # 2nd pass: get values
+        time_step = 0
+        last_step = 0
+
+        count = 0
+        vcd_file = file_read(vcd_file_path)
+
+        for line in vcd_file:
+            if line != "":
+                if line[0] == "#":
+                    time_step = int(line[1:])
+
+                    # print('Time step: %d' % time_step)
+                    time_diff = time_step - last_step
+                    for var in var_list:
+                        if var["last"] == "1":
+                            var["high"] += time_diff
+                        elif var["last"] == "0":
+                            var["low"] += time_diff
+                        elif var["last"] == "x":
+                            var["x"] += time_diff
+                    last_step = time_step
+
+                elif line[0] == "b" and line[1] != "x":
+                    val, alias = line.split()
+                    val_len = len(val[1:])
+
+                    bit_index = val_len - 1
+                    for bit_char in val[1:]:
+                        bit_val = bit_char
+                        for var in var_list:
+                            if alias == var["alias"]:
+                                templateSize = "{0:0%db}" % (var["len"])
+                                word = templateSize.format(int(val[1:], 2))
+                                rev_word = word[::-1]
+                                if (
+                                    var["last"] != "2"
+                                    and var["last"] != rev_word[var["bit_index"]]
+                                ):
+                                    var["toggle"] += 1
+                                var["last"] = rev_word[var["bit_index"]]
+
+                        bit_index -= 1
+
+                elif line[0] == "0" or line[0] == "1" or line[0] == "x":
+                    bit_val = line[0]
+                    alias = line[1:]
+                    for var in var_list:
+                        if alias == var["alias"] and var["len"] == 1:
+                            if var["last"] != "2" and var["last"] != bit_val:
+                                var["toggle"] += 1
+                            var["last"] = bit_val
+            if verbose:
+                count += 1
+                print(f"Pass #2: {count}/{total}")
+
+        duration = time_step - 1
+        # 3rd pass: write file
+
+        text_level = 0
+        level = 0
+
+        count = 0
+        vcd_file = file_read(vcd_file_path)
+
+        def get_time_stamp():
+            now = datetime.datetime.now()
+            year = '{:02d}'.format(now.year)
+            month = '{:02d}'.format(now.month)
+            day = '{:02d}'.format(now.day)
+            hour = '{:02d}'.format(now.hour)
+            minute = '{:02d}'.format(now.minute)
+            second = '{:02d}'.format(now.second)
+            date_string = '{}-{}-{} {}:{}:{}'.format(month, day, year, hour, minute, second)
+            return date_string
+
+        saifile = open(saif, "w")
+
+        saifile.write("(SAIFILE\n")
+        saifile.write('(SAIFVERSION "%s")\n' % saifversion)
+        saifile.write('(DIRECTION "%s")\n' % direction)
+        saifile.write('(DESIGN "%s")\n' % design)
+        saifile.write('(DATE "%s")\n' % get_time_stamp())
+        saifile.write('(VENDOR "%s")\n' % vendor)
+        saifile.write('(PROGRAM_NAME "%s")\n' % program_name)
+        saifile.write('(VERSION "%s")\n' % version)
+        saifile.write("(DIVIDER %s)\n" % divider)
+        saifile.write("(TIMESCALE %s)\n" % timescale)
+        saifile.write("(DURATION %ld)\n" % duration)
+
+        def saif_indent_level(level):
+            space = ''
+            for _ in range(level):
+                space += '  '
+            return space
+
+        for line in vcd_file:
+            search = re.search(r"\$scope", line)
+            if search is not None:
+                ls = line.split()
+                name = ls[2]
+                saifile.write(
+                    "%s(INSTANCE %s\n" % (saif_indent_level(text_level), name)
+                )
+                text_level += 1
+                level += 1
+                saifile.write("%s(NET\n" % (saif_indent_level(text_level)))
+                text_level += 1
+
+                # put variables
+                for var in var_list:
+                    if var["parent"] == name and var["level"] == level:
+                        if var["multi_bit"] == 0:
+                            saifile.write(
+                                "%s(%s\n" % (saif_indent_level(text_level), var["name"])
+                            )
+                        else:
+                            saifile.write(
+                                "%s(%s\\[%d\\]\n"
+                                % (
+                                    saif_indent_level(text_level),
+                                    var["name"],
+                                    var["bit_index"],
+                                )
+                            )
+
+                        saifile.write(
+                            "%s  (T0 %d) (T1 %d) (TX %d)\n"
+                            % (
+                                saif_indent_level(text_level),
+                                var["low"],
+                                var["high"],
+                                var["x"],
+                            )
+                        )
+
+                        saifile.write(
+                            "%s  (TC %d) (IG %d)\n"
+                            % (saif_indent_level(text_level), var["toggle"], var["ig"])
+                        )
+
+                        saifile.write("%s)\n" % (saif_indent_level(text_level)))
+
+                text_level -= 1
+                saifile.write("%s)\n" % (saif_indent_level(text_level)))
+                continue
+
+            search = re.search(r"\$upscope", line)
+            if search is not None:
+                text_level -= 1
+                level -= 1
+                saifile.write("%s)\n" % (saif_indent_level(text_level)))
+
+            if verbose:
+                count += 1
+                print(f"Pass #3: {count}/{total}")
+
+        saifile.write(")\n")
+        saifile.close()
+
+        self.saif_parser(saif)
+
     def resynth(self):
         '''
         Calls resynthesis function to reduce circuit structure using logic synthesis optimizations/mapping

From ed59bc689c1d0949231bfb875a6fe8d57fb56c87 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 8 May 2025 13:09:08 -0600
Subject: [PATCH 14/65] Make write_to_disk take in a full path instead of a
 weird filename that has stuff prepended and appended

---
 circuit.py | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/circuit.py b/circuit.py
index d47dbfa..3cd38ac 100644
--- a/circuit.py
+++ b/circuit.py
@@ -275,15 +275,15 @@ def get_wires_to_be_deleted(self):
 
 
 
-    def write_to_disk (self, filename=""):
+    def write_to_disk (self, filepath):
         '''
         Write the xml circuit into a netlist file considering the nodes to be
         deleted (marked with an attribute delete='yes')
 
-        Returns
-        -------
-        string
-            path of the recently created netlist
+        Parameters
+        ----------
+        filepath: string
+            Full file path to the generated file.
         '''
 
         def format_io(node, io):
@@ -293,9 +293,6 @@ def format_io(node, io):
         nodes_to_delete = self.get_nodes_to_delete()
         to_be_deleted, to_be_assigned = self.get_wires_to_be_deleted()
 
-        filename = filename if filename != "" else str(randint(9999,999999))
-        filepath = f"{self.output_folder}{path.sep}{filename}.v"
-
         with open(filepath, 'w') as netlist_file:
 
             def writeln(file, text):
@@ -340,7 +337,6 @@ def writeln(file, text):
                 writeln(netlist_file, assign)
 
             writeln(netlist_file, "endmodule")
-        return filepath
 
 
     def show (self, filename=None, show_deletes=False, view=True, format="png"):
@@ -494,8 +490,8 @@ def exact_output (self, testbench, output_file):
         '''
 
 
-        name = get_name(5)
-        rtl = self.write_to_disk(name)
+        rtl = f"{self.output_folder}/{get_name(5)}.v"
+        self.write_to_disk(rtl)
 
         top = self.topmodule
         current_dir=os.path.dirname(__file__)
@@ -539,8 +535,8 @@ def simulate(self, testbench, approximate_output):
             The user must provide the full file path and name. If the file
             exists, it will be overwritten.
         '''
-        name = get_name(5)
-        rtl = self.write_to_disk(name)
+        rtl = f"{self.output_folder}/{get_name(5)}.v"
+        self.write_to_disk(rtl)
 
         top = self.topmodule
         tech = "./templates/" + self.tech_file
@@ -666,7 +662,7 @@ def generate_dataset(self, filename, samples, distribution='uniform', **kwargs):
 
         return
 
-    def write_tb(self, filename, dataset_file, iterations=None, timescale= '10ns / 1ps', delay=10, format='h', dump_vcd=None, show_progress=True):
+    def write_tb(self, filename, dataset_file, iterations=None, timescale= '10ns / 1ps', delay=10, format='h', dump_vcd=None, show_progress=False):
         '''
         Writes a basic testbench for the circuit.
 
@@ -1106,8 +1102,10 @@ def resynth(self):
         :return: path-like string
             path to resynthetized file
         '''
-        name=get_name(5)
-        self.netl_file =resynthesis(self.write_to_disk(name),self.tech_file,self.topmodule)
+        rtl = f"{self.output_folder}/{get_name(5)}.v"
+        self.write_to_disk(rtl)
+
+        self.netl_file =resynthesis(rtl,self.tech_file,self.topmodule)
 
         netlist = Netlist(self.netl_file, self.technology)
         self.netl_root = netlist.root
@@ -1117,7 +1115,7 @@ def resynth(self):
         self.raw_outputs = netlist.raw_outputs
         self.raw_parameters = netlist.raw_parameters
 
-        os.remove(f'{self.output_folder}/{name}.v')
+        os.remove(rtl)
 
         return self.netl_file
 
@@ -1131,9 +1129,10 @@ def get_area(self, method = 'yosys'):
         '''
 
         if method == 'yosys':
-            name=get_name(5)
-            area=ys_get_area(self.write_to_disk(name),self.tech_file,self.topmodule)
-            os.remove(f'{self.output_folder}/{name}.v')
+            rtl = f"{self.output_folder}/{get_name(5)}.v"
+            self.write_to_disk(rtl)
+            area=ys_get_area(rtl,self.tech_file,self.topmodule)
+            os.remove(rtl)
 
             return area
         else:

From e4df79d749e7923bbf83833c58f6497e28160882 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 8 May 2025 14:01:12 -0600
Subject: [PATCH 15/65] Make simulations execute from the testbench file dir

---
 circuit.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/circuit.py b/circuit.py
index 3cd38ac..13d29b1 100644
--- a/circuit.py
+++ b/circuit.py
@@ -496,7 +496,10 @@ def exact_output (self, testbench, output_file):
         top = self.topmodule
         current_dir=os.path.dirname(__file__)
         tech = f"{current_dir}/templates/" + self.tech_file
-        out = self.output_folder
+
+        # Executable is ran from the testbench folder, because the path to the
+        # dataset is relative to the testbench file.
+        out = os.path.dirname(testbench)
 
         """Better to temporarily change cwd when executing iverilog"""
         cwd=os.getcwd()
@@ -540,7 +543,10 @@ def simulate(self, testbench, approximate_output):
 
         top = self.topmodule
         tech = "./templates/" + self.tech_file
-        out = self.output_folder
+
+        # Executable is ran from the testbench folder, because the path to the
+        # dataset is relative to the testbench file.
+        out = os.path.dirname(testbench)
 
         """Better to temporarily change cwd when executing iverilog"""
         cwd=os.getcwd()
@@ -662,7 +668,7 @@ def generate_dataset(self, filename, samples, distribution='uniform', **kwargs):
 
         return
 
-    def write_tb(self, filename, dataset_file, iterations=None, timescale= '10ns / 1ps', delay=10, format='h', dump_vcd=None, show_progress=False):
+    def write_tb(self, filename, dataset_file, iterations=None, timescale= '10ns / 1ps', delay=10, format='h', dump_vcd=None, show_progress=True):
         '''
         Writes a basic testbench for the circuit.
 

From 4367471d2e668d2542750cc9f19243cf156cf1df Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 8 May 2025 14:43:48 -0600
Subject: [PATCH 16/65] Allow specifying a different topmodule for a circuit

---
 circuit.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/circuit.py b/circuit.py
index 13d29b1..8dfad15 100644
--- a/circuit.py
+++ b/circuit.py
@@ -38,7 +38,7 @@ class Circuit:
     '''
 
 
-    def __init__(self, rtl, tech, saif = ""):
+    def __init__(self, rtl, tech, saif = "", topmodule = None):
         '''
         Parse a rtl circuit into a xml tree using a specific technology library
 
@@ -50,12 +50,19 @@ def __init__(self, rtl, tech, saif = ""):
             path to the technology file
         saif : string
             path to the saif file
+        topmodule : string (optional)
+            name of the circuit module that we want to synthesize, if not
+            provided it will be inferred from the rtl filename
         '''
 
 
         self.rtl_file = rtl
         self.tech_file = tech
-        self.topmodule = rtl.split('/')[-1].replace(".v","")
+
+        if not topmodule:
+            topmodule = rtl.split('/')[-1].replace(".v","")
+
+        self.topmodule = topmodule
         self.netl_file = synthesis (rtl, self.tech_file, self.topmodule)
         self.technology = Technology(self.tech_file)
         # extract the usefull attributes of netlist

From 53511c5a832970ab434b6b0c606f0b71266f1cd1 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 8 May 2025 14:50:30 -0600
Subject: [PATCH 17/65] Add the runner module and the AREA metric

---
 __main__.py      |   8 ++
 configuration.py |  21 +++++
 runner.py        | 229 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 258 insertions(+)
 create mode 100644 runner.py

diff --git a/__main__.py b/__main__.py
index 11c01bd..56b1f30 100644
--- a/__main__.py
+++ b/__main__.py
@@ -1,6 +1,7 @@
 import argparse
 from circuit import Circuit
 from configuration import ApproxSynthesisConfig, AlsMethod, Metric
+from runner import run
 
 # The tech library is hardcoded for the following reasons:
 # - Ease of use: This way users don't have to provide a tech library which most
@@ -77,6 +78,13 @@ def main():
         print("Configuration loaded successfully")
         print(config)
 
+        results = run(config)
+
+        print("\n---- Results -----")
+        for metric in config.metrics:
+            value = results[metric]
+            print(f"{metric.value}: {metric.to_user_friendly_display(value)}")
+
     elif args.subcommand == "generate":
         generate_dataset(args)
 
diff --git a/configuration.py b/configuration.py
index 03fe0d0..cbca0e6 100644
--- a/configuration.py
+++ b/configuration.py
@@ -30,6 +30,25 @@ class Metric(str, Enum):
     MEAN_RELATIVE_ERROR_DISTANCE = "mred"
     MEAN_SQUARED_ERROR_DISTANCE = "msed"
     ALS_TIME = "time"
+    AREA = "area"
+
+
+    def to_user_friendly_display(self, value: float) -> str:
+        """
+        Formats the value to a user friendly string format for display.
+        For example, the AREA metric is a percentage value, so it's formatted as
+        such.
+        """
+        match self:
+            # Percentage metrics
+            case Metric.MEAN_RELATIVE_ERROR_DISTANCE | Metric.AREA:
+                return f"{round(value*100, 2)}%"
+            case Metric.ALS_TIME:
+                return f"{round(value, 2)} s"
+            # No special handling except rounding
+            case _:
+                return str(round(value, 2))
+
 
 
 # List of iterative methods.
@@ -65,6 +84,8 @@ class ApproxSynthesisConfig:
 
     metrics : list[Metric | str]
         Metrics to calculate for the execution.
+        The time metric is given in seconds, the area metric is given as a % of
+        the area of the original circuit.
 
     resynthesis : bool, default=False
         Whether to use resynthesis.
diff --git a/runner.py b/runner.py
new file mode 100644
index 0000000..a130815
--- /dev/null
+++ b/runner.py
@@ -0,0 +1,229 @@
+from collections.abc import Callable
+import csv
+import os
+import time
+from circuit import Circuit
+from circuiterror import compute_error
+from ml_algorithms.decision_tree import DecisionTreeCircuit
+from utils import read_dataset
+from configuration import AlsMethod, ApproxSynthesisConfig, Metric
+
+type Results = dict[Metric, float]
+
+# Directory to output build files to.
+# TODO: Consider making this a config parameter.
+BUILD_DIR = "build"
+
+# Files generated by the different methods. Defined here to just reuse in the
+# code.
+APPROX_RTL = f"{BUILD_DIR}/.approx.v"
+
+RESYNTH_RTL = f"{BUILD_DIR}/.resynth.v"
+
+EXACT_OUTPUT = f"{BUILD_DIR}/.exact_output"
+APPROX_OUTPUT = f"{BUILD_DIR}/.approx_output"
+RESYNTH_OUTPUT = f"{BUILD_DIR}/.resynth_output"
+
+TB = f"{BUILD_DIR}/.tb.v"
+
+VCD = f"{BUILD_DIR}/.vcd"
+SAIF = f"{BUILD_DIR}/.saif"
+
+# This list should contain all the Metrics that are related to approximation
+# errors.
+_APPROXIMATION_ERROR_METRICS: list[Metric] = [
+    Metric.HAMMING_DISTANCE,
+    Metric.MEAN_ERROR_DISTANCE,
+    Metric.WORST_CASE_ERROR,
+    Metric.MEAN_RELATIVE_ERROR_DISTANCE,
+    Metric.MEAN_SQUARED_ERROR_DISTANCE,
+]
+
+
+def run(config: ApproxSynthesisConfig) -> Results:
+    """
+    Runner function for an execution specified by a valid ApproxSynthesisConfig.
+    This function will do the following steps:
+    - Simulate the exact circuit
+    - Carry out the given ALS method
+    - Calculate the metrics given in config.metrics and return them as a Results
+      dict.
+    """
+    if not os.path.exists(BUILD_DIR):
+        os.makedirs(BUILD_DIR)
+
+    config.circuit.write_tb(TB, config.dataset, show_progress=config.show_progress)
+    config.circuit.exact_output(TB, EXACT_OUTPUT)
+
+    # The benchmark functions should return the final approximated circuit and
+    # also carry out the final simulation to generate the APPROX_OUTPUT that
+    # will be used to calculate error metrics.
+    benchmark_fn: Callable[[ApproxSynthesisConfig], Circuit]
+    match config.method:
+        case AlsMethod.CONSTANT_INPUTS:
+            benchmark_fn = _run_constant_inputs
+        case AlsMethod.CONSTANT_OUTPUTS:
+            benchmark_fn = _run_constant_outputs
+        case AlsMethod.PROBRUN:
+            _create_saif(config)
+            benchmark_fn = _run_probrun
+        case AlsMethod.SIGNIFICANCE:
+            benchmark_fn = _run_significance
+        case AlsMethod.CCARVING:
+            benchmark_fn = _run_ccarving
+        case AlsMethod.DECISION_TREE:
+            benchmark_fn = _run_decision_tree
+
+    # Timed code includes:
+    # - Execution of ALS method.
+    # - Simulation of approximated circuit.
+    # - Calculation of all metrics.
+    start_time = time.perf_counter()
+
+    original_area = float(config.circuit.get_area())
+
+    approx_circuit = benchmark_fn(config)
+    results = _compute_error_metrics(config)
+
+    if Metric.AREA in config.metrics:
+        approx_area = float(approx_circuit.get_area())
+        results[Metric.AREA] = approx_area / original_area
+
+    end_time = time.perf_counter()
+
+    if Metric.ALS_TIME in config.metrics:
+        elapsed_time = end_time - start_time
+        results[Metric.ALS_TIME] = elapsed_time
+
+    if config.csv:
+        _write_results_to_csv(config, results)
+
+    return results
+
+
+def _create_saif(config: ApproxSynthesisConfig):
+    """
+    Create a SAIF file and annotate the circuit with its data
+    """
+
+    VCD_TB = f"{BUILD_DIR}/.vcd_tb.v"
+
+    config.circuit.write_tb(
+        VCD_TB,
+        config.dataset,
+        dump_vcd=VCD,
+        show_progress=config.show_progress,
+    )
+    config.circuit.exact_output(VCD_TB, EXACT_OUTPUT)
+    config.circuit.generate_saif_from_vcd(SAIF, VCD)
+
+
+def _write_results_to_csv(config: ApproxSynthesisConfig, results: Results):
+    """
+    Writes the execution results to a CSV file as a single row.
+    """
+    assert config.csv is not None, (
+        "_write_results_to_csv should only be called if a csv file was given by the user"
+    )
+
+    file_exists = os.path.isfile(config.csv)
+
+    with open(config.csv, mode="a", newline="") as file:
+        writer = csv.writer(file)
+        # If the file does not exist, write the header
+        if not file_exists:
+            writer.writerow(config.csv_columns())
+        writer.writerow(config.csv_values(results))
+
+
+def _run_decision_tree(config: ApproxSynthesisConfig) -> Circuit:
+    exact_circuit = config.circuit
+    inputs = read_dataset(config.dataset, 16)
+    outputs = read_dataset(EXACT_OUTPUT, 10)
+
+    tree = DecisionTreeCircuit(
+        exact_circuit.inputs,
+        exact_circuit.outputs,
+        one_tree_per_output=config.one_tree_per_output,
+        max_depth=config.max_depth,
+    )
+
+    tree.train(inputs, outputs)
+    tree.to_verilog_file(exact_circuit.topmodule, APPROX_RTL)
+    tree_circuit = Circuit(
+        APPROX_RTL, exact_circuit.tech_file, topmodule=exact_circuit.topmodule
+    )
+
+    if not config.resynthesis:
+        tree_circuit.simulate(TB, APPROX_OUTPUT)
+
+    else:
+        tree_circuit.resynth()
+        tree_circuit.simulate(TB, APPROX_OUTPUT)
+        error = compute_error(
+            Metric.MEAN_RELATIVE_ERROR_DISTANCE, EXACT_OUTPUT, APPROX_OUTPUT
+        )
+
+        assert isinstance(config.error, float), (
+            "'error' should be given when executing decision tree with resynthesis"
+        )
+
+        if error > config.error:
+            return tree_circuit
+
+        assert isinstance(config.max_iters, int), (
+            "'max_iters' should be given when executing decision tree with resynthesis"
+        )
+
+        for _ in range(config.max_iters):
+            last_output = read_dataset(APPROX_OUTPUT, 10)
+
+            tree.train(inputs, last_output)
+            tree.to_verilog_file(exact_circuit.topmodule, RESYNTH_RTL)
+
+            resynth_circuit = Circuit(
+                RESYNTH_RTL, exact_circuit.tech_file, topmodule=exact_circuit.topmodule
+            )
+            resynth_circuit.simulate(TB, RESYNTH_OUTPUT)
+
+            error = compute_error(
+                Metric.MEAN_RELATIVE_ERROR_DISTANCE, EXACT_OUTPUT, RESYNTH_OUTPUT
+            )
+
+            if error > config.error:
+                return tree_circuit
+            else:
+                os.replace(RESYNTH_OUTPUT, APPROX_OUTPUT)
+                tree_circuit = resynth_circuit
+
+    return tree_circuit
+
+
+def _run_constant_inputs(config: ApproxSynthesisConfig) -> Circuit:
+    return config.circuit  # TODO Implement method
+
+
+def _run_constant_outputs(config: ApproxSynthesisConfig) -> Circuit:
+    return config.circuit  # TODO Implement method
+
+
+def _run_probrun(config: ApproxSynthesisConfig) -> Circuit:
+    return config.circuit  # TODO Implement method
+
+
+def _run_significance(config: ApproxSynthesisConfig) -> Circuit:
+    return config.circuit  # TODO Implement method
+
+
+def _run_ccarving(config: ApproxSynthesisConfig) -> Circuit:
+    return config.circuit  # TODO Implement method
+
+
+def _compute_error_metrics(config: ApproxSynthesisConfig) -> Results:
+    result: Results = {}
+    for metric in config.metrics:
+        if metric in _APPROXIMATION_ERROR_METRICS:
+            error = compute_error(metric.value, EXACT_OUTPUT, APPROX_OUTPUT)
+            result[metric] = error
+
+    return result

From b179907c7262e954a028c274760df5db2fc7db57 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 8 May 2025 14:50:47 -0600
Subject: [PATCH 18/65] Fix misc bugs

---
 ml_algorithms/decision_tree.py | 2 +-
 utils.py                       | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/ml_algorithms/decision_tree.py b/ml_algorithms/decision_tree.py
index e48062c..33a0dde 100644
--- a/ml_algorithms/decision_tree.py
+++ b/ml_algorithms/decision_tree.py
@@ -120,7 +120,7 @@ def to_verilog_file(self, topmodule: str, output_file: str):
         raw_inputs = [
             f"input {variable.name};"
             if variable.bits == 1
-            else f"input [{variable.bits}:0] {variable.name};"
+            else f"input [{variable.bits - 1}:0] {variable.name};"
             for variable in self.inputs
         ]
         raw_outputs = [
diff --git a/utils.py b/utils.py
index 1154106..09b145f 100644
--- a/utils.py
+++ b/utils.py
@@ -107,9 +107,12 @@ def read_dataset(filename: str, base: int, max_lines: None | int =None) -> List[
     """
     with open(filename, "r") as f:
         if max_lines is not None:
-            return [
+            values = [
                 [int(x, base) for x in line.split()]
                 for _, line in zip(range(max_lines), f)
             ]
         else:
-            return [[int(x, base) for x in line.split()] for line in f]
+            values = [[int(x, base) for x in line.split()] for line in f]
+
+        # Filter out empty lines
+        return [value for value in values if value]

From a3f664f63b9c2e8d49fa7f13590e81378337172e Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 8 May 2025 20:09:29 -0600
Subject: [PATCH 19/65] Add CLI check for circuit file existing

---
 __main__.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/__main__.py b/__main__.py
index 56b1f30..b668c21 100644
--- a/__main__.py
+++ b/__main__.py
@@ -1,4 +1,5 @@
 import argparse
+import os
 from circuit import Circuit
 from configuration import ApproxSynthesisConfig, AlsMethod, Metric
 from runner import run
@@ -57,6 +58,9 @@ def main():
         if len(metrics) == 0:
             metrics = [Metric.MEAN_RELATIVE_ERROR_DISTANCE, Metric.ALS_TIME]
 
+        if not os.path.isfile(args.circuit):
+            parser.error(f"The path given for the circuit '{args.circuit}' does not exist.")
+
         try:
             circuit = Circuit(args.circuit, TECH)
             config = ApproxSynthesisConfig(

From ccce88c543f665847698ebe4df569ddfb0202371 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Fri, 9 May 2025 14:45:40 -0600
Subject: [PATCH 20/65] Improve --show-progress flag docs

---
 __main__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/__main__.py b/__main__.py
index b668c21..f1f728f 100644
--- a/__main__.py
+++ b/__main__.py
@@ -135,7 +135,7 @@ def run_arguments(run_parser):
         help="Use one tree per output for decision_tree",
     )
     run_parser.add_argument(
-        "--show-progress", action="store_true", help="Show simulation progress"
+        "--show-progress", action="store_true", help="Show the progress of simulations executed for the ALS."
     )
     run_parser.add_argument(
         "--csv",

From 96c522bceb393b786b031d06cee971e855517e14 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Fri, 9 May 2025 14:46:13 -0600
Subject: [PATCH 21/65] Rename variables in write_to_disk to be clearer and
 correct

---
 circuit.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/circuit.py b/circuit.py
index 8dfad15..0a5702b 100644
--- a/circuit.py
+++ b/circuit.py
@@ -315,15 +315,15 @@ def writeln(file, text):
             for wire in self.get_circuit_wires():
                 if wire not in to_be_deleted:
                     writeln(netlist_file, f"\twire {wire};")
-            used_outputs=[]
+            used_ports=[]
             for output in self.raw_outputs:
-                if output not in used_outputs:
+                if output not in used_ports:
                     writeln(netlist_file, "\t" + output)
-                    used_outputs.append(output)
-            for output in self.raw_inputs:
-                if output not in used_outputs:
-                    writeln(netlist_file, "\t" + output)
-                    used_outputs.append(output)
+                    used_ports.append(output)
+            for input in self.raw_inputs:
+                if input not in used_ports:
+                    writeln(netlist_file, "\t" + input)
+                    used_ports.append(input)
 
             for node_var in self.get_circuit_nodes():
                 if node_var not in nodes_to_delete:

From aeb4875406ef3c79316a6efe666d847d88adfbcf Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Fri, 9 May 2025 14:46:46 -0600
Subject: [PATCH 22/65] Fix decision tree method bug

---
 runner.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/runner.py b/runner.py
index a130815..a10ed43 100644
--- a/runner.py
+++ b/runner.py
@@ -24,7 +24,8 @@
 APPROX_OUTPUT = f"{BUILD_DIR}/.approx_output"
 RESYNTH_OUTPUT = f"{BUILD_DIR}/.resynth_output"
 
-TB = f"{BUILD_DIR}/.tb.v"
+EXACT_TB = f"{BUILD_DIR}/.exact_tb.v"
+APPROX_TB = f"{BUILD_DIR}/.approx_tb.v"
 
 VCD = f"{BUILD_DIR}/.vcd"
 SAIF = f"{BUILD_DIR}/.saif"
@@ -52,8 +53,10 @@ def run(config: ApproxSynthesisConfig) -> Results:
     if not os.path.exists(BUILD_DIR):
         os.makedirs(BUILD_DIR)
 
-    config.circuit.write_tb(TB, config.dataset, show_progress=config.show_progress)
-    config.circuit.exact_output(TB, EXACT_OUTPUT)
+    config.circuit.write_tb(
+        EXACT_TB, config.dataset, show_progress=config.show_progress
+    )
+    config.circuit.exact_output(EXACT_TB, EXACT_OUTPUT)
 
     # The benchmark functions should return the final approximated circuit and
     # also carry out the final simulation to generate the APPROX_OUTPUT that
@@ -154,12 +157,18 @@ def _run_decision_tree(config: ApproxSynthesisConfig) -> Circuit:
         APPROX_RTL, exact_circuit.tech_file, topmodule=exact_circuit.topmodule
     )
 
+    tree_circuit.write_tb(
+        APPROX_TB,
+        config.dataset,
+        show_progress=config.show_progress,
+    )
+
     if not config.resynthesis:
-        tree_circuit.simulate(TB, APPROX_OUTPUT)
+        tree_circuit.simulate(APPROX_TB, APPROX_OUTPUT)
 
     else:
         tree_circuit.resynth()
-        tree_circuit.simulate(TB, APPROX_OUTPUT)
+        tree_circuit.simulate(APPROX_TB, APPROX_OUTPUT)
         error = compute_error(
             Metric.MEAN_RELATIVE_ERROR_DISTANCE, EXACT_OUTPUT, APPROX_OUTPUT
         )
@@ -184,7 +193,7 @@ def _run_decision_tree(config: ApproxSynthesisConfig) -> Circuit:
             resynth_circuit = Circuit(
                 RESYNTH_RTL, exact_circuit.tech_file, topmodule=exact_circuit.topmodule
             )
-            resynth_circuit.simulate(TB, RESYNTH_OUTPUT)
+            resynth_circuit.simulate(APPROX_TB, RESYNTH_OUTPUT)
 
             error = compute_error(
                 Metric.MEAN_RELATIVE_ERROR_DISTANCE, EXACT_OUTPUT, RESYNTH_OUTPUT

From 66e40e631de62233cd6230e17a9590ccc621aee1 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Sat, 10 May 2025 17:52:47 -0600
Subject: [PATCH 23/65] Fix bug with trying to test circuits with multiple
 outputs where tree trained with outputs in different order and also testbench
 was written with outputs being printed in reverse order for some reason

---
 circuit.py |   8 ++--
 netlist.py | 136 ++++++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 118 insertions(+), 26 deletions(-)

diff --git a/circuit.py b/circuit.py
index 0a5702b..011877c 100644
--- a/circuit.py
+++ b/circuit.py
@@ -808,11 +808,11 @@ def write_tb(self, filename, dataset_file, iterations=None, timescale= '10ns / 1
              f'  #{delay}\n' \
              f'  $fwrite(file, "'
         for o in range(len(outputs_info.keys())):
-            text=f'{text}%d\\n '
-        text=f'{text}",'
-        for o in list(outputs_info.keys())[::-1][0:-1]:
+            text=f'{text}%d '
+        text=f'{text}\\n",'
+        for o in list(outputs_info.keys())[0:-1]:
             text= f'{text}{o},'
-        text= f'{text}{list(outputs_info.keys())[0]});\n'
+        text= f'{text}{list(outputs_info.keys())[-1]});\n'
 
         if show_progress:
             text +=f'  $display("-- Progress: %d/{iterations} --",i+1);\n'
diff --git a/netlist.py b/netlist.py
index f7f4c55..0043f7c 100644
--- a/netlist.py
+++ b/netlist.py
@@ -52,13 +52,13 @@ def __init__(self, netl_file, technology):
         with open(netl_file, 'r') as circuit_file:
             content = circuit_file.read()
 
-        self.raw_outputs, self.circuit_outputs = self.get_outputs(content)
-        self.raw_inputs, self.circuit_inputs = self.get_inputs(content)
-
         expreg = r'module [a-zA-Z0-9_]*\s*\(([\s\S]+?)\);'
         parameters = re.search(expreg,content)
         self.raw_parameters = re.sub('\n','',parameters.group(1))
 
+        self.raw_outputs, self.circuit_outputs = self.get_outputs(content, self.raw_parameters)
+        self.raw_inputs, self.circuit_inputs = self.get_inputs(content, self.raw_parameters)
+
         assigns = parse_assigns(content)
         for a in assigns:
             self.assignments.append(a)
@@ -156,24 +156,31 @@ def to_xml(self):
         return root
 
 
-    def get_inputs(self, netlist_rtl):
+    def get_inputs(self, netlist_rtl, raw_parameters):
         '''
-        Extracts the input variables of the circuit
-        TODO: support one bit variables
+        Extracts the circuit's input variables.
 
-        The `circuit_inputs` will be returned from MSB -> LSB. This is important
-        to provide the inputs in the correct order to methods that map a circuit
-        representation to a Verilog format, like the Decision Tree method.
+        Inputs are returned in two ways:
+        - `circuit_inputs`: expanded, sorted MSB→LSB, and follow the order in `raw_parameters`.
+        - `raw_inputs`: unexpanded lines, sorted to match the same order.
+
+        This ordering ensures compatibility with Verilog-generating methods like
+        the Decision Tree, which expect to receive bit-accurate and positionally
+        correct inputs in order to replicate them.
+
+        TODO: support one bit variables
 
         Parameters
         ----------
-        netlist_rtl : string
-            content of the netlist file
+        netlist_rtl : str
+            Content of the netlist file.
+        raw_parameters : str
+            Module's parameter list string.
 
         Returns
         -------
-        array
-            list of circuit intputs
+        tuple[list[str], list[str]]
+            raw_inputs and circuit_inputs
         '''
         raw_inputs = []
         circuit_inputs = []
@@ -193,28 +200,38 @@ def get_inputs(self, netlist_rtl):
             else:
                 circuit_inputs.append(f"{i[3]}")
                 raw_inputs.append(f"input {i[3]};")
+
+        circuit_inputs = sort_expanded_vars(circuit_inputs, raw_parameters)
+        raw_inputs = sort_raw_vars(raw_inputs, raw_parameters)
         return raw_inputs, circuit_inputs
 
 
-    def get_outputs(self, netlist_rtl):
+    def get_outputs(self, netlist_rtl, raw_parameters):
         '''
-        Extracts the output variables of the circuit
-        TODO: support one bit variables
+        Extracts the circuit's output variables.
+
+        Outputs are returned in two ways:
+        - `circuit_outputs`: expanded, sorted MSB→LSB, and follow the order in `raw_parameters`.
+        - `raw_outputs`: unexpanded lines, sorted to match the same order.
 
-        The `circuit_outputs` will be returned from MSB -> LSB. This is
-        important to provide the outputs in the correct order to methods that
-        map a circuit representation to a Verilog format, like the Decision Tree
-        method.
+        This ordering ensures compatibility with Verilog-generating methods like
+        the Decision Tree, which expect to receive bit-accurate and positionally
+        correct inputs in order to replicate them.
+
+        TODO: support one bit variables
 
         Parameters
         ----------
         netlist_rtl : string
             content of the netlist file
+        raw_parameters : str
+            Module's parameter list string.
+
 
         Returns
         -------
-        array
-            list of circuit outputs
+        tuple[list[str], list[str]]
+            raw_outputs and circuit_outputs
         '''
         raw_outputs = []
         circuit_outputs = []
@@ -234,6 +251,9 @@ def get_outputs(self, netlist_rtl):
             else:
                 circuit_outputs.append(f"{o[3]}")
                 raw_outputs.append(f"output {o[3]};")
+
+        circuit_outputs = sort_expanded_vars(circuit_outputs, raw_parameters)
+        raw_outputs = sort_raw_vars(raw_outputs, raw_parameters)
         return raw_outputs, circuit_outputs
 
 def expand_range(name):
@@ -364,3 +384,75 @@ def parse_assigns(content):
             raise ValueError(f"Bit width mismatch: LHS {lhs_bits} != RHS {rhs_bits}")
         result.extend(zip(lhs_bits, rhs_bits))
     return result
+
+def extract_param_names(raw_parameters):
+    """
+    Extracts the names of input/output/inout parameters from a module definition string.
+
+    Parameters
+    ----------
+    raw_parameters : str
+        String of the module's parameter list, e.g. "input a, output [3:0] b".
+
+    Returns
+    -------
+    list[str]
+        Ordered list of parameter names as they appear in the string.
+    """
+    return re.findall(
+        r"\b(?:input|output|inout)?\s*(?:\[.*?\]\s*)?(\w+)", raw_parameters
+    )
+
+
+def sort_expanded_vars(expanded_vars, raw_parameters):
+    """
+    Sorts a list of expanded signal names (e.g. in[3], in[2], ..., in[0])
+    based on their order in the module definition and from MSB to LSB.
+
+    Parameters
+    ----------
+    expanded_vars : list[str]
+        List of bit-level signal names.
+    raw_parameters : str
+        Module parameter list string for determining signal order.
+
+    Returns
+    -------
+    list[str]
+        Sorted list of expanded variables.
+    """
+    param_order = extract_param_names(raw_parameters)
+    order_map = {name: i for i, name in enumerate(param_order)}
+
+    def sort_key(var):
+        base, idx = re.match(r"(\w+)(?:\[(\d+)\])?", var).groups()
+        return (order_map[base], -int(idx) if idx else 0)
+
+    return sorted(expanded_vars, key=sort_key)
+
+
+def sort_raw_vars(raw_list, raw_parameters):
+    """
+    Sorts unexpanded input/output/inout declarations by the order of their
+    parameter names in the module definition.
+
+    Parameters
+    ----------
+    raw_list : list[str]
+        List of unexpanded variable declarations, e.g. "input [3:0] a;".
+    raw_parameters : str
+        Module parameter list string for determining signal order.
+
+    Returns
+    -------
+    list[str]
+        Sorted list of raw declarations.
+    """
+    param_order = extract_param_names(raw_parameters)
+    order_map = {name: i for i, name in enumerate(param_order)}
+
+    def sort_key(line):
+        match = re.search(r"(\w+)\s*;", line)
+        return order_map.get(match.group(1), float("inf")) if match else float("inf")
+
+    return sorted(raw_list, key=sort_key)

From 958eab5d66c19f6398bed3cc65eb6daab8a70db9 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Sat, 10 May 2025 17:54:03 -0600
Subject: [PATCH 24/65] Improve decision tree _tree_2_equation to assign
 constant instead of None or LEAF_NODE_1

---
 ml_algorithms/decision_tree.py | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/ml_algorithms/decision_tree.py b/ml_algorithms/decision_tree.py
index 33a0dde..c00de16 100644
--- a/ml_algorithms/decision_tree.py
+++ b/ml_algorithms/decision_tree.py
@@ -269,16 +269,17 @@ def _tree_2_equation(
 
     Returns
     -------
-    str or None
-        A Boolean expression string for the subtree rooted at `node`, or None if
+    str or int
+        A Boolean expression string for the subtree rooted at `node`, or an int
+        if the output is constant for the subtree (0 or 1)
         the subtree always evaluates to 0.
     """
     if tree.feature[node] == -2:  # Leaf node
         result = tree.value[node][output].argmax()
         if result == 0:
-            return None
+            return 0
         else:
-            return "LEAF_NODE_1"
+            return 1
 
     else:  # Internal node
         left_result = _tree_2_equation(
@@ -294,24 +295,24 @@ def _tree_2_equation(
         negated_input = f"!{input}"
 
         match (left_result, right_result):
-            case (None, None):
-                return None
+            case (0, 0):
+                return 0
 
-            case (None, "LEAF_NODE_1"):
+            case (0, 1):
                 return input
-            case ("LEAF_NODE_1", None):
+            case (1, 0):
                 return negated_input
-            case ("LEAF_NODE_1", "LEAF_NODE_1"):
-                return "LEAF_NODE_1"
+            case (1, 1):
+                return 1
 
-            case (str(left), "LEAF_NODE_1"):
+            case (str(left), 1):
                 return f"{input} | ({left})"
-            case ("LEAF_NODE_1", str(right)):
+            case (1, str(right)):
                 return f"{negated_input} | ({right})"
 
-            case (str(left), None):
+            case (str(left), 0):
                 return f"{negated_input} & ({left})"
-            case (None, str(right)):
+            case (0, str(right)):
                 return f"{input} & ({right})"
             case (str(left), str(right)):
                 return f"({negated_input} & ({left})) | ({input} & ({right}))"

From d69f45a3d33ddc18031685624329bf96211b987a Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Sun, 11 May 2025 17:36:09 -0600
Subject: [PATCH 25/65] Improve assign parser to also handle constants

---
 netlist.py | 128 +++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 89 insertions(+), 39 deletions(-)

diff --git a/netlist.py b/netlist.py
index 0043f7c..bf5e85b 100644
--- a/netlist.py
+++ b/netlist.py
@@ -256,19 +256,62 @@ def get_outputs(self, netlist_rtl, raw_parameters):
         raw_outputs = sort_raw_vars(raw_outputs, raw_parameters)
         return raw_outputs, circuit_outputs
 
-def expand_range(name):
-    '''
-    Expands a Verilog-style bit range expression into a list of individual bits.
+
+def expand_concat(expr):
+    """
+    Expands a Verilog-style concatenation expression into a flat list of
+    individual bits.
+
+    Parameters
+    ----------
+    expr : string
+        A Verilog signal, range, or concatenation like "{a[3:0], b[1], c}".
+
+    Returns
+    -------
+    List[string]
+        A list of strings like ["a[3]", "a[2]", "a[1]", "a[0]", "b[1]", "c"].
+
+    Examples
+    -------
+        >>> expand_concat("{ a[2:1], b[0] }")
+        ['a[2]', 'a[1]', 'b[0]']
+
+        >>> expand_concat("{ x[1], y[3:2], z }")
+        ['x[1]', 'y[3]', 'y[2]', 'z']
+
+        >>> expand_concat("a[3]")
+        ['a[3]']
+
+        >>> expand_concat("b[1:0]")
+        ['b[1]', 'b[0]']
+    """
+    expr = expr.strip()
+    if expr.startswith("{") and expr.endswith("}"):
+        inner = expr[1:-1]
+        parts = [p.strip() for p in inner.split(",")]
+        bits = []
+        for p in parts:
+            bits.extend(expand_range(p))
+        return bits
+    else:
+        return expand_range(expr)
+
+
+def expand_range(expr):
+    """
+    Expands a Verilog-style range expression into a flat list of individual bits
+    or constants.
 
     Parameters
     ----------
     name : string
-        A string like "a[3:0]" or "b[7]".
+        A Verilog signal, range or constant
 
     Returns
     -------
     List[string]
-        A list of strings like ["a[3]", "a[2]", "a[1]", "a[0]"].
+        A list of strings or bits.
 
     Examples
     -------
@@ -283,54 +326,60 @@ def expand_range(name):
 
         >>> expand_range("z")
         ['z']
-    '''
-    m = re.match(r'(\w+)\[(\d+):(\d+)\]', name)
-    if not m:
-        return [name]
-    var, hi, lo = m.groups()
-    hi, lo = int(hi), int(lo)
-    step = -1 if hi > lo else 1
-    return [f"{var}[{i}]" for i in range(hi, lo + step, step)]
 
-def expand_concat(expr):
-    '''
-    Expands a Verilog-style concatenation expression into a flat list of
+        >>> expand_range("4'hd")
+        [1, 1, 0, 1]
+    """
+    expr = expr.strip()
+    if "'" in expr:
+        return expand_constant(expr)
+    elif "[" in expr:
+        if ":" in expr:
+            base, range_part = expr.split("[")
+            range_part = range_part[:-1]
+            start, end = map(int, range_part.split(":"))
+            return [f"{base}[{i}]" for i in range(start, end - 1, -1)]
+        else:
+            return [expr]
+    else:
+        return [expr]
+
+
+def expand_constant(expr):
+    """
+    Expands a Verilog-style constant variable expression into a flat list of
     individual bits.
 
     Parameters
     ----------
     expr : string
-        A Verilog signal, range, or concatenation like "{a[3:0], b[1], c}".
+        A Verilog constant like "3'h6". Currently only supports hexadecimal
+        expressions, but that should be enough since that's how yosys assigns
+        constants.
 
     Returns
     -------
-    List[string]
-        A list of strings like ["a[3]", "a[2]", "a[1]", "a[0]", "b[1]", "c"].
+    List[int]
+        A list of bits like [1, 1, 0].
 
     Examples
     -------
-        >>> expand_concat("{ a[2:1], b[0] }")
-        ['a[2]', 'a[1]', 'b[0]']
+        >>> expand_constant("1'h1")
+        [1]
 
-        >>> expand_concat("{ x[1], y[3:2], z }")
-        ['x[1]', 'y[3]', 'y[2]', 'z']
+        >>> expand_constant("4'hd")
+        [1, 1, 0, 1]
+    """
+    size, value = expr.split("'h")
+    size = int(size)
+    value = value.lower()
 
-        >>> expand_concat("a[3]")
-        ['a[3]']
+    int_value = int(value, 16)
+
+    bits = [(int_value >> i) & 1 for i in range(size - 1, -1, -1)]
+
+    return bits
 
-        >>> expand_concat("b[1:0]")
-        ['b[1]', 'b[0]']
-    '''
-    expr = expr.strip()
-    if expr.startswith('{') and expr.endswith('}'):
-        inner = expr[1:-1]
-        parts = [p.strip() for p in inner.split(',')]
-        bits = []
-        for p in parts:
-            bits.extend(expand_range(p))
-        return bits
-    else:
-        return expand_range(expr)
 
 def parse_assigns(content):
     '''
@@ -374,7 +423,7 @@ def parse_assigns(content):
 
         assign out = { in1[0:1], in2[0:1] }
     '''
-    expreg = r'assign\s+(.*?)\s*=\s*(.*?);'
+    expreg = r"assign\s+(.*?)\s*=\s*(.*?);"
     assigns = re.findall(expreg, content)
     result = []
     for lhs, rhs in assigns:
@@ -385,6 +434,7 @@ def parse_assigns(content):
         result.extend(zip(lhs_bits, rhs_bits))
     return result
 
+
 def extract_param_names(raw_parameters):
     """
     Extracts the names of input/output/inout parameters from a module definition string.

From 777a6184113b9d6b7ee3518867f79ee96c548e30 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Mon, 12 May 2025 13:41:38 -0600
Subject: [PATCH 26/65] Remove EXACT_TB and APPROX_TB, no longer needed

---
 runner.py | 25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/runner.py b/runner.py
index a10ed43..ec38f63 100644
--- a/runner.py
+++ b/runner.py
@@ -24,8 +24,7 @@
 APPROX_OUTPUT = f"{BUILD_DIR}/.approx_output"
 RESYNTH_OUTPUT = f"{BUILD_DIR}/.resynth_output"
 
-EXACT_TB = f"{BUILD_DIR}/.exact_tb.v"
-APPROX_TB = f"{BUILD_DIR}/.approx_tb.v"
+TB = f"{BUILD_DIR}/.tb.v"
 
 VCD = f"{BUILD_DIR}/.vcd"
 SAIF = f"{BUILD_DIR}/.saif"
@@ -54,13 +53,13 @@ def run(config: ApproxSynthesisConfig) -> Results:
         os.makedirs(BUILD_DIR)
 
     config.circuit.write_tb(
-        EXACT_TB, config.dataset, show_progress=config.show_progress
+        TB, config.dataset, show_progress=config.show_progress
     )
-    config.circuit.exact_output(EXACT_TB, EXACT_OUTPUT)
+    config.circuit.exact_output(TB, EXACT_OUTPUT)
 
-    # The benchmark functions should return the final approximated circuit and
-    # also carry out the final simulation to generate the APPROX_OUTPUT that
-    # will be used to calculate error metrics.
+    # The benchmark functions should return the final approximated circuit (for
+    # area calculation) and also carry out the final simulation to generate the
+    # APPROX_OUTPUT that will be used to calculate error metrics.
     benchmark_fn: Callable[[ApproxSynthesisConfig], Circuit]
     match config.method:
         case AlsMethod.CONSTANT_INPUTS:
@@ -157,18 +156,12 @@ def _run_decision_tree(config: ApproxSynthesisConfig) -> Circuit:
         APPROX_RTL, exact_circuit.tech_file, topmodule=exact_circuit.topmodule
     )
 
-    tree_circuit.write_tb(
-        APPROX_TB,
-        config.dataset,
-        show_progress=config.show_progress,
-    )
-
     if not config.resynthesis:
-        tree_circuit.simulate(APPROX_TB, APPROX_OUTPUT)
+        tree_circuit.simulate(TB, APPROX_OUTPUT)
 
     else:
         tree_circuit.resynth()
-        tree_circuit.simulate(APPROX_TB, APPROX_OUTPUT)
+        tree_circuit.simulate(TB, APPROX_OUTPUT)
         error = compute_error(
             Metric.MEAN_RELATIVE_ERROR_DISTANCE, EXACT_OUTPUT, APPROX_OUTPUT
         )
@@ -193,7 +186,7 @@ def _run_decision_tree(config: ApproxSynthesisConfig) -> Circuit:
             resynth_circuit = Circuit(
                 RESYNTH_RTL, exact_circuit.tech_file, topmodule=exact_circuit.topmodule
             )
-            resynth_circuit.simulate(APPROX_TB, RESYNTH_OUTPUT)
+            resynth_circuit.simulate(TB, RESYNTH_OUTPUT)
 
             error = compute_error(
                 Metric.MEAN_RELATIVE_ERROR_DISTANCE, EXACT_OUTPUT, RESYNTH_OUTPUT

From 3939947b74385316b638f18ae0286469fcf30b4e Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Mon, 12 May 2025 16:46:35 -0600
Subject: [PATCH 27/65] Add InOuts methods to runner

---
 configuration.py |   6 ++
 runner.py        | 157 ++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 154 insertions(+), 9 deletions(-)

diff --git a/configuration.py b/configuration.py
index cbca0e6..371f9d7 100644
--- a/configuration.py
+++ b/configuration.py
@@ -328,6 +328,12 @@ def _validate_max_iters(
     ):
         raise ValueError(f"'max_iters' is required for {method} with resynthesis")
 
+    if max_iters is not None:
+        try:
+            return int(max_iters)
+        except ValueError:
+            raise ValueError("'max_iters' should be given as an integer")
+
 
 def _validate_max_depth(
     max_depth: int | None,
diff --git a/runner.py b/runner.py
index ec38f63..e125f77 100644
--- a/runner.py
+++ b/runner.py
@@ -2,9 +2,11 @@
 import csv
 import os
 import time
+from xml.etree import ElementTree
 from circuit import Circuit
 from circuiterror import compute_error
 from ml_algorithms.decision_tree import DecisionTreeCircuit
+from pruning_algorithms.inouts import GetInputs, GetOutputs
 from utils import read_dataset
 from configuration import AlsMethod, ApproxSynthesisConfig, Metric
 
@@ -22,7 +24,7 @@
 
 EXACT_OUTPUT = f"{BUILD_DIR}/.exact_output"
 APPROX_OUTPUT = f"{BUILD_DIR}/.approx_output"
-RESYNTH_OUTPUT = f"{BUILD_DIR}/.resynth_output"
+TEMP_OUTPUT = f"{BUILD_DIR}/.tmp_output"
 
 TB = f"{BUILD_DIR}/.tb.v"
 
@@ -52,9 +54,7 @@ def run(config: ApproxSynthesisConfig) -> Results:
     if not os.path.exists(BUILD_DIR):
         os.makedirs(BUILD_DIR)
 
-    config.circuit.write_tb(
-        TB, config.dataset, show_progress=config.show_progress
-    )
+    config.circuit.write_tb(TB, config.dataset, show_progress=config.show_progress)
     config.circuit.exact_output(TB, EXACT_OUTPUT)
 
     # The benchmark functions should return the final approximated circuit (for
@@ -97,6 +97,9 @@ def run(config: ApproxSynthesisConfig) -> Results:
         elapsed_time = end_time - start_time
         results[Metric.ALS_TIME] = elapsed_time
 
+    # For debugging or checking the final output
+    approx_circuit.write_to_disk(APPROX_RTL)
+
     if config.csv:
         _write_results_to_csv(config, results)
 
@@ -186,27 +189,115 @@ def _run_decision_tree(config: ApproxSynthesisConfig) -> Circuit:
             resynth_circuit = Circuit(
                 RESYNTH_RTL, exact_circuit.tech_file, topmodule=exact_circuit.topmodule
             )
-            resynth_circuit.simulate(TB, RESYNTH_OUTPUT)
+            resynth_circuit.simulate(TB, TEMP_OUTPUT)
 
             error = compute_error(
-                Metric.MEAN_RELATIVE_ERROR_DISTANCE, EXACT_OUTPUT, RESYNTH_OUTPUT
+                Metric.MEAN_RELATIVE_ERROR_DISTANCE, EXACT_OUTPUT, TEMP_OUTPUT
             )
 
             if error > config.error:
                 return tree_circuit
             else:
-                os.replace(RESYNTH_OUTPUT, APPROX_OUTPUT)
+                os.replace(TEMP_OUTPUT, APPROX_OUTPUT)
                 tree_circuit = resynth_circuit
 
     return tree_circuit
 
 
 def _run_constant_inputs(config: ApproxSynthesisConfig) -> Circuit:
-    return config.circuit  # TODO Implement method
+    return _run_constant_inputs_outputs(config, config.circuit.inputs, "inputs")
 
 
 def _run_constant_outputs(config: ApproxSynthesisConfig) -> Circuit:
-    return config.circuit  # TODO Implement method
+    return _run_constant_inputs_outputs(config, config.circuit.outputs, "outputs")
+
+
+def _run_constant_inputs_outputs(
+    config: ApproxSynthesisConfig, circuit_variables: list[str], inputs_or_outputs: str
+) -> Circuit:
+    """
+    The InOuts method accepts either a list of inputs or outputs to make
+    constant, and then returns a list of nodes that could be pruned.
+
+    The selection of which input/outputs to make constant is not part of the
+    InOuts method, so each user must select them under whichever criteria fits
+    their use case best.
+
+    For this runner execution, which must use a generic heuristic for any
+    circuit, we'll select the LSBs of each input/output to be constant. If we
+    manage to prune all the suggested nodes without going over the error
+    threshold or the max iterations then we can use the next LSB of each
+    input/output. For example, in a circuit with the input/outputs:
+
+        ["in1[2]", "in1[1]", "in1[0]", "in2[2]", "in2[1]", "in2[0]", cin]
+
+    We'll first prune the nodes suggested when ["in1[0]", "in2[0]", cin] are set
+    as constants. If we delete all the suggested nodes then we'll move on to the
+    nodes suggested when ["in1[0]", "in1[1]", "in2[0]", "in2[1]", cin] are
+    constants.
+    """
+    circuit = config.circuit
+
+    assert config.max_iters is not None, (
+        f"'max_iters' should be given when executing {config.method}"
+    )
+
+    assert config.error is not None, (
+        f"'error' should be given when executing {config.method}"
+    )
+
+    max_const_bit = 0
+    iteration = 0
+
+    while iteration < config.max_iters:
+        const_variables = _get_lsbs_up_to(circuit_variables, max_const_bit)
+        deletable_nodes: list[ElementTree.Element]
+
+        match inputs_or_outputs:
+            case "inputs":
+                deletable_nodes = GetInputs(circuit.netl_root, const_variables)
+            case "outputs":
+                deletable_nodes = GetOutputs(circuit.netl_root, const_variables)
+            case _:
+                raise ValueError("Invalid call to _run_constant_inputs_outputs")
+
+        # Filter Already deleted nodes
+        deletable_nodes = [
+            node for node in deletable_nodes if node.get("delete") != "yes"
+        ]
+
+        if len(deletable_nodes) == 0:
+            if set(const_variables) == set(circuit_variables):
+                # All variables have been set as const and all elected nodes
+                # have been deleted
+                return circuit
+            else:
+                max_const_bit += 1
+                continue
+
+        node_to_delete = deletable_nodes.pop(0)
+
+        print(f"Pruning node {node_to_delete.attrib['var']}")
+        node_to_delete.set("delete", "yes")
+
+        if config.resynthesis:
+            circuit.resynth()
+
+        error = circuit.simulate_and_compute_error(
+            TB, EXACT_OUTPUT, TEMP_OUTPUT, Metric.MEAN_RELATIVE_ERROR_DISTANCE
+        )
+
+        print(f"Pruned circuit error: {error}")
+
+        if iteration > 0 and error > config.error:
+            print("Error has overpassed threshold, undoing last prune\n")
+            node_to_delete.set("delete", "no")
+            break
+
+        iteration += 1
+        os.replace(TEMP_OUTPUT, APPROX_OUTPUT)
+
+    return circuit
 
 
 def _run_probrun(config: ApproxSynthesisConfig) -> Circuit:
@@ -229,3 +320,51 @@ def _compute_error_metrics(config: ApproxSynthesisConfig) -> Results:
             result[metric] = error
 
     return result
+
+
+def _get_lsbs_up_to(variables: list[str], bit_index: int) -> list[str]:
+    """
+    Get a list of circuit variables with bits up to a specified index.
+
+    Parameters
+    ----------
+    variables : list of str
+        A list of circuit variable strings, which may include bit indices in the format 'var[bit_index]' or just 'var'.
+    bit_index : int
+        The bit index up to which the variables should be included in the result.
+
+    Returns
+    -------
+    list of str
+        A list of variables that have a bit index less than or equal to the specified bit index.
+        If a variable does not have a bit index (e.g., 'cin'), it is included in the result.
+
+    Examples
+    --------
+    >>> variables = ["in1[2]", "in1[1]", "in1[0]", "in2[2]", "in2[1]", "in2[0]", "cin"]
+    >>> get_lsbs_up_to(variables, 0)
+    ['in1[0]', 'in2[0]', 'cin']
+
+    >>> get_lsbs_up_to(variables, 1)
+    ['in1[0]', 'in1[1]', 'in2[0]', 'in2[1]', 'cin']
+
+    >>> get_lsbs_up_to(["out[0]", "out[1]", "out[2]"], 0)
+    ['out[0]']
+    """
+    result = []
+    for var in variables:
+        # Check if the variable is a string and contains a bit index
+        if "[" in var and "]" in var:
+            # Extract the bit index from the variable string
+            start = var.index("[") + 1
+            end = var.index("]")
+            var_bit_index = int(var[start:end])
+
+            # Check if the variable's bit index is less than or equal to the specified bit index
+            if var_bit_index <= bit_index:
+                result.append(var)
+        else:
+            # If it's not a variable with a bit index, add it directly (e.g., cin)
+            result.append(var)
+
+    return result

From 4bdddfbfa02c7724a338e09b24b1a7900e4bad7a Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Mon, 12 May 2025 18:02:16 -0600
Subject: [PATCH 28/65] Get rid of nonsensical decision tree resynthesis
 iteration

---
 configuration.py | 43 +++----------------------------------------
 runner.py        | 41 ++---------------------------------------
 2 files changed, 5 insertions(+), 79 deletions(-)

diff --git a/configuration.py b/configuration.py
index 371f9d7..9d0b827 100644
--- a/configuration.py
+++ b/configuration.py
@@ -32,7 +32,6 @@ class Metric(str, Enum):
     ALS_TIME = "time"
     AREA = "area"
 
-
     def to_user_friendly_display(self, value: float) -> str:
         """
         Formats the value to a user friendly string format for display.
@@ -42,7 +41,7 @@ def to_user_friendly_display(self, value: float) -> str:
         match self:
             # Percentage metrics
             case Metric.MEAN_RELATIVE_ERROR_DISTANCE | Metric.AREA:
-                return f"{round(value*100, 2)}%"
+                return f"{round(value * 100, 2)}%"
             case Metric.ALS_TIME:
                 return f"{round(value, 2)} s"
             # No special handling except rounding
@@ -50,7 +49,6 @@ def to_user_friendly_display(self, value: float) -> str:
                 return str(round(value, 2))
 
 
-
 # List of iterative methods.
 _ITERATIVE_METHODS = [
     AlsMethod.CONSTANT_INPUTS,
@@ -60,9 +58,6 @@ def to_user_friendly_display(self, value: float) -> str:
     AlsMethod.CCARVING,
 ]
 
-# List of methods that aren't iterative but can be when doing resynthesis.
-_ITERATIVE_METHODS_WITH_RESYNTHESIS = [AlsMethod.DECISION_TREE]
-
 
 class ApproxSynthesisConfig:
     """
@@ -163,8 +158,8 @@ def __init__(
         self.metrics = _validate_metrics(metrics)
 
         self.resynthesis = resynthesis
-        self.error = _validate_error(error, self.method, self.resynthesis)
-        self.max_iters = _validate_max_iters(max_iters, self.method, self.resynthesis)
+        self.error = _validate_error(error, self.method)
+        self.max_iters = max_iters
 
         self.max_depth = _validate_max_depth(max_depth, self.method)
         self.one_tree_per_output = one_tree_per_output
@@ -286,7 +281,6 @@ def _validate_dataset(
 def _validate_error(
     error: float | None,
     method: AlsMethod,
-    resynthesis: bool,
 ) -> float | None:
     """
     Validates 'error'.
@@ -300,41 +294,10 @@ def _validate_error(
     if method in _ITERATIVE_METHODS:
         if error is None:
             raise ValueError(f"'error' is required for method {method}")
-    elif (
-        method in _ITERATIVE_METHODS_WITH_RESYNTHESIS and resynthesis and error is None
-    ):
-        raise ValueError(f"'error' is required for method {method} with resynthesis")
 
     return error
 
 
-def _validate_max_iters(
-    max_iters: int | None, method: AlsMethod, resynthesis: bool
-) -> int | None:
-    """
-    Validates 'max_iters'.
-
-    Required for iterative methods.
-    Raises ValueError if missing in that case.
-    """
-
-    if method in _ITERATIVE_METHODS:
-        if max_iters is None:
-            raise ValueError(f"'max_iters' is required for method {method}")
-    if (
-        method in _ITERATIVE_METHODS_WITH_RESYNTHESIS
-        and resynthesis
-        and max_iters is None
-    ):
-        raise ValueError(f"'max_iters' is required for {method} with resynthesis")
-
-    if max_iters is not None:
-        try:
-            return int(max_iters)
-        except ValueError:
-            raise ValueError("'max_iters' should be given as an integer")
-
-
 def _validate_max_depth(
     max_depth: int | None,
     method: AlsMethod,
diff --git a/runner.py b/runner.py
index e125f77..6695345 100644
--- a/runner.py
+++ b/runner.py
@@ -159,47 +159,10 @@ def _run_decision_tree(config: ApproxSynthesisConfig) -> Circuit:
         APPROX_RTL, exact_circuit.tech_file, topmodule=exact_circuit.topmodule
     )
 
-    if not config.resynthesis:
-        tree_circuit.simulate(TB, APPROX_OUTPUT)
-
-    else:
+    if config.resynthesis:
         tree_circuit.resynth()
-        tree_circuit.simulate(TB, APPROX_OUTPUT)
-        error = compute_error(
-            Metric.MEAN_RELATIVE_ERROR_DISTANCE, EXACT_OUTPUT, APPROX_OUTPUT
-        )
-
-        assert isinstance(config.error, float), (
-            "'error' should be given when executing decision tree with resynthesis"
-        )
-
-        if error > config.error:
-            return tree_circuit
-
-        assert isinstance(config.max_iters, int), (
-            "'max_iters' should be given when executing decision tree with resynthesis"
-        )
-
-        for _ in range(config.max_iters):
-            last_output = read_dataset(APPROX_OUTPUT, 10)
-
-            tree.train(inputs, last_output)
-            tree.to_verilog_file(exact_circuit.topmodule, RESYNTH_RTL)
 
-            resynth_circuit = Circuit(
-                RESYNTH_RTL, exact_circuit.tech_file, topmodule=exact_circuit.topmodule
-            )
-            resynth_circuit.simulate(TB, TEMP_OUTPUT)
-
-            error = compute_error(
-                Metric.MEAN_RELATIVE_ERROR_DISTANCE, EXACT_OUTPUT, TEMP_OUTPUT
-            )
-
-            if error > config.error:
-                return tree_circuit
-            else:
-                os.replace(TEMP_OUTPUT, APPROX_OUTPUT)
-                tree_circuit = resynth_circuit
+    tree_circuit.simulate(TB, APPROX_OUTPUT)
 
     return tree_circuit
 

From d03d756c7295139d8c860ee84e8e09c5dc297d69 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Mon, 12 May 2025 18:10:09 -0600
Subject: [PATCH 29/65] Remove requirement for max_iters

---
 runner.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/runner.py b/runner.py
index 6695345..f8b1796 100644
--- a/runner.py
+++ b/runner.py
@@ -201,18 +201,15 @@ def _run_constant_inputs_outputs(
     """
     circuit = config.circuit
 
-    assert config.max_iters is not None, (
-        f"'max_iters' should be given when executing {config.method}"
-    )
-
     assert config.error is not None, (
         f"'error' should be given when executing {config.method}"
     )
 
     max_const_bit = 0
     iteration = 0
+    max_iters = config.max_iters if config.max_iters else float("inf")
 
-    while iteration < config.max_iters:
+    while iteration < max_iters:
         const_variables = _get_lsbs_up_to(circuit_variables, max_const_bit)
         deletable_nodes: list[ElementTree.Element]
 

From 9d862bd75e3ad9c9d64407bab7f56f33b0b06741 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Tue, 13 May 2025 11:32:10 -0600
Subject: [PATCH 30/65] Add validation option to config

---
 configuration.py | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/configuration.py b/configuration.py
index 9d0b827..a88d0a1 100644
--- a/configuration.py
+++ b/configuration.py
@@ -106,6 +106,21 @@ class ApproxSynthesisConfig:
     show_progress : bool, default=False
         Whether to show simulation progress.
 
+    validation : float (0 < x <= 1), optional
+        Specifies the proportion of the input dataset to be used for the
+        validation set.
+
+        If provided, the dataset will be split into two subsets: a test set and a
+        validation set.
+        The value of this parameter represents the percentage of the total
+        dataset that will be allocated to the validation set. For example, a
+        value of 0.2 means that 20% of the dataset will be used for validation,
+        while the remaining 80% will be used for testing during ALS.
+
+        This can help verify whether the generated solution will generalize well
+        to the rest of the possible circuit inputs that aren't part of the
+        dataset.
+
     csv : str, optional
         Path to a file to save the output in csv format.
         If the file doesn't exist, it will be created with a header for the
@@ -114,6 +129,13 @@ class ApproxSynthesisConfig:
         The output will be given as a single line with the following columns:
             method, circuit, resynthesis, error, max_iters, max_depth, one_tree_per_output, metric1, metric2, ...
 
+        If the 'validation' option is given, the metrics columns will look like:
+
+        metric1, v_metric1, metric2, v_metric2, ...
+
+        Where the metric prepended with 'v_' is the result of that metric on the
+        validation set. Applies only for error metrics.
+
         - bool values are stored as "True" or "False".
         - optional fields will just be left blank if not provided.
     """
@@ -128,6 +150,7 @@ class ApproxSynthesisConfig:
     max_depth: int | None
     one_tree_per_output: bool
     show_progress: bool
+    validation: float | None
     csv: str | None
 
     def __init__(
@@ -142,6 +165,7 @@ def __init__(
         max_depth: int | None = None,
         one_tree_per_output: bool = False,
         show_progress: bool = False,
+        validation: float | None = None,
         csv: str | None = None,
     ):
         """
@@ -165,6 +189,7 @@ def __init__(
         self.one_tree_per_output = one_tree_per_output
         self.show_progress = show_progress
         self.csv = csv
+        self.validation = _validate_validation(validation)
 
     @override
     def __repr__(self):
@@ -321,3 +346,19 @@ def _validate_max_depth(
                 raise ValueError("max_depth must be > 1")
 
             return max_depth
+
+
+def _validate_validation(validation: float | None) -> float | None:
+    """
+    Validates the 'validation' parameter.
+
+    Ensures that the value is a float within the range (0, 1].
+    If the value is None, it is considered valid and returned as is.
+
+    Raises ValueError if the value is not in the specified range.
+    """
+    if validation is not None:
+        if not (0 < validation <= 1):
+            raise ValueError("Validation value must be a float in the range (0, 1].")
+
+    return validation

From ddeca7dc942c74428d2ea626add770a2426bbb88 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Tue, 13 May 2025 11:40:36 -0600
Subject: [PATCH 31/65] Add --validation flag to CLI

---
 __main__.py      | 15 +++++++++++++--
 configuration.py | 38 +++++++++++++++++++-------------------
 2 files changed, 32 insertions(+), 21 deletions(-)

diff --git a/__main__.py b/__main__.py
index f1f728f..893763c 100644
--- a/__main__.py
+++ b/__main__.py
@@ -59,7 +59,9 @@ def main():
             metrics = [Metric.MEAN_RELATIVE_ERROR_DISTANCE, Metric.ALS_TIME]
 
         if not os.path.isfile(args.circuit):
-            parser.error(f"The path given for the circuit '{args.circuit}' does not exist.")
+            parser.error(
+                f"The path given for the circuit '{args.circuit}' does not exist."
+            )
 
         try:
             circuit = Circuit(args.circuit, TECH)
@@ -70,6 +72,7 @@ def main():
                 dataset=args.dataset,
                 resynthesis=args.resynthesis,
                 error=args.error,
+                validation=args.validation,
                 max_iters=args.max_iters,
                 max_depth=args.max_depth,
                 one_tree_per_output=args.one_tree_per_output,
@@ -121,6 +124,11 @@ def run_arguments(run_parser):
         type=float,
         help="Maximum error threshold to stop iterations. (0 < x <= 1). The error used is Mean Relative Error Distance.",
     )
+    run_parser.add_argument(
+        "--validation",
+        type=float,
+        help="Proportion of the dataset to allocate for validation (0 < x <= 1).",
+    )
     run_parser.add_argument(
         "--max-iters",
         type=int,
@@ -135,7 +143,9 @@ def run_arguments(run_parser):
         help="Use one tree per output for decision_tree",
     )
     run_parser.add_argument(
-        "--show-progress", action="store_true", help="Show the progress of simulations executed for the ALS."
+        "--show-progress",
+        action="store_true",
+        help="Show the progress of simulations executed for the ALS.",
     )
     run_parser.add_argument(
         "--csv",
@@ -148,6 +158,7 @@ def run_arguments(run_parser):
 
         - bool values are stored as "True" or "False".
         - optional fields will just be left blank if not provided.
+        - if 'validation' flag is given, error metrics will be given as: metric1, v_metric1, metric2, v_metric2, ...
         """,
     )
 
diff --git a/configuration.py b/configuration.py
index a88d0a1..e5e03eb 100644
--- a/configuration.py
+++ b/configuration.py
@@ -91,21 +91,6 @@ class ApproxSynthesisConfig:
 
         The error used is the Mean Relative Error Distance.
 
-    max_iters : int, optional
-        Maximum amount of iterations to execute. Used in iterative methods,
-        like pruning methods or ML methods with resynthesis.
-
-    max_depth : int, optional
-        Required for 'decision_tree'.
-
-    one_tree_per_output : bool, default=False
-        Used only by 'decision_tree' method.
-        If True, uses a separate tree per output.
-        If False, uses a single multi-output tree.
-
-    show_progress : bool, default=False
-        Whether to show simulation progress.
-
     validation : float (0 < x <= 1), optional
         Specifies the proportion of the input dataset to be used for the
         validation set.
@@ -121,6 +106,21 @@ class ApproxSynthesisConfig:
         to the rest of the possible circuit inputs that aren't part of the
         dataset.
 
+    max_iters : int, optional
+        Maximum amount of iterations to execute. Used in iterative methods,
+        like pruning methods or ML methods with resynthesis.
+
+    max_depth : int, optional
+        Required for 'decision_tree'.
+
+    one_tree_per_output : bool, default=False
+        Used only by 'decision_tree' method.
+        If True, uses a separate tree per output.
+        If False, uses a single multi-output tree.
+
+    show_progress : bool, default=False
+        Whether to show simulation progress.
+
     csv : str, optional
         Path to a file to save the output in csv format.
         If the file doesn't exist, it will be created with a header for the
@@ -146,11 +146,11 @@ class ApproxSynthesisConfig:
     metrics: list[Metric]
     resynthesis: bool
     error: float | None
+    validation: float | None
     max_iters: int | None
     max_depth: int | None
     one_tree_per_output: bool
     show_progress: bool
-    validation: float | None
     csv: str | None
 
     def __init__(
@@ -161,11 +161,11 @@ def __init__(
         metrics: list[Metric | str],
         resynthesis: bool = False,
         error: float | None = None,
+        validation: float | None = None,
         max_iters: int | None = None,
         max_depth: int | None = None,
         one_tree_per_output: bool = False,
         show_progress: bool = False,
-        validation: float | None = None,
         csv: str | None = None,
     ):
         """
@@ -183,13 +183,13 @@ def __init__(
 
         self.resynthesis = resynthesis
         self.error = _validate_error(error, self.method)
+        self.validation = _validate_validation(validation)
         self.max_iters = max_iters
 
         self.max_depth = _validate_max_depth(max_depth, self.method)
         self.one_tree_per_output = one_tree_per_output
         self.show_progress = show_progress
         self.csv = csv
-        self.validation = _validate_validation(validation)
 
     @override
     def __repr__(self):
@@ -359,6 +359,6 @@ def _validate_validation(validation: float | None) -> float | None:
     """
     if validation is not None:
         if not (0 < validation <= 1):
-            raise ValueError("Validation value must be a float in the range (0, 1].")
+            raise ValueError("'validation' value must be a float in the range 0 < x <= 1.")
 
     return validation

From 26eab158e374172d3cba8cac7085cca65a1fde77 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Wed, 14 May 2025 10:53:44 -0600
Subject: [PATCH 32/65] Add validation option logic to runner and csv
 generation

---
 __main__.py      |  12 +++--
 configuration.py |  70 ++++++++++++++++--------
 runner.py        | 136 ++++++++++++++++++++++++++++++++++++++---------
 3 files changed, 167 insertions(+), 51 deletions(-)

diff --git a/__main__.py b/__main__.py
index 893763c..e22dd6d 100644
--- a/__main__.py
+++ b/__main__.py
@@ -85,13 +85,19 @@ def main():
         print("Configuration loaded successfully")
         print(config)
 
-        results = run(config)
+        (results, validation_results) = run(config)
 
         print("\n---- Results -----")
         for metric in config.metrics:
             value = results[metric]
             print(f"{metric.value}: {metric.to_user_friendly_display(value)}")
 
+        if validation_results:
+            print("\n---- Results on Validation Set -----")
+            for metric, value in validation_results.items():
+                print(f"{metric.value}: {metric.to_user_friendly_display(value)}")
+
+
     elif args.subcommand == "generate":
         generate_dataset(args)
 
@@ -127,7 +133,7 @@ def run_arguments(run_parser):
     run_parser.add_argument(
         "--validation",
         type=float,
-        help="Proportion of the dataset to allocate for validation (0 < x <= 1).",
+        help="Proportion of the dataset to allocate for validation (0 <= x < 1).",
     )
     run_parser.add_argument(
         "--max-iters",
@@ -192,7 +198,7 @@ def generate_dataset(args: argparse.Namespace):
     if isinstance(size, float):
         if not (0 < size <= 1.0):
             raise argparse.ArgumentTypeError(
-                f"Dataset size must be greater than 0: {size}"
+                f"Dataset size as a percentage of total inputs must be between 0 < x <= 1.0: {size}"
             )
 
         max_inputs = 2 ** (len(circuit.inputs))
diff --git a/configuration.py b/configuration.py
index e5e03eb..ce20ef8 100644
--- a/configuration.py
+++ b/configuration.py
@@ -48,6 +48,17 @@ def to_user_friendly_display(self, value: float) -> str:
             case _:
                 return str(round(value, 2))
 
+    def is_error_metric(self) -> bool:
+        # This set should contain all the Metrics that are related to
+        # approximation errors.
+        return self in {
+            Metric.HAMMING_DISTANCE,
+            Metric.MEAN_ERROR_DISTANCE,
+            Metric.WORST_CASE_ERROR,
+            Metric.MEAN_RELATIVE_ERROR_DISTANCE,
+            Metric.MEAN_SQUARED_ERROR_DISTANCE,
+        }
+
 
 # List of iterative methods.
 _ITERATIVE_METHODS = [
@@ -91,20 +102,20 @@ class ApproxSynthesisConfig:
 
         The error used is the Mean Relative Error Distance.
 
-    validation : float (0 < x <= 1), optional
-        Specifies the proportion of the input dataset to be used for the
+    validation : float (0 <= x < 1), optional
+        Specifies the proportion of the input dataset to be allocated to the
         validation set.
 
-        If provided, the dataset will be split into two subsets: a test set and a
-        validation set.
-        The value of this parameter represents the percentage of the total
-        dataset that will be allocated to the validation set. For example, a
-        value of 0.2 means that 20% of the dataset will be used for validation,
-        while the remaining 80% will be used for testing during ALS.
+        If provided, the dataset will be split into a validation set and a test
+        set. For example, a value of 0.2 means 20% of the dataset will be used
+        for validation, while 80% will be used for testing during ALS. This helps
+        assess whether the generated solution generalizes well to unseen circuit
+        inputs.
 
-        This can help verify whether the generated solution will generalize well
-        to the rest of the possible circuit inputs that aren't part of the
-        dataset.
+        A value of 0 indicates that the full dataset will be used for training,
+        similar to not providing this parameter. However, it can be useful when
+        generating csv data because the columns will be formatted to align with
+        other circuits that are using a validation set.
 
     max_iters : int, optional
         Maximum amount of iterations to execute. Used in iterative methods,
@@ -122,22 +133,23 @@ class ApproxSynthesisConfig:
         Whether to show simulation progress.
 
     csv : str, optional
-        Path to a file to save the output in csv format.
-        If the file doesn't exist, it will be created with a header for the
-        columns, if it exists it will be appended to.
+        Path to a file for saving the output in CSV format. If the file does not
+        exist, it will be created with a header; if it exists, the output will be
+        appended.
 
         The output will be given as a single line with the following columns:
             method, circuit, resynthesis, error, max_iters, max_depth, one_tree_per_output, metric1, metric2, ...
 
-        If the 'validation' option is given, the metrics columns will look like:
+        If the 'validation' option is given, the metrics will include validation results, formatted as:
 
-        metric1, v_metric1, metric2, v_metric2, ...
+            metric1, v_metric1, metric2, v_metric2, ...
 
-        Where the metric prepended with 'v_' is the result of that metric on the
-        validation set. Applies only for error metrics.
+        Where 'v_' indicates the metric's result on the validation set. This
+        applies only to error metrics.
 
         - bool values are stored as "True" or "False".
-        - optional fields will just be left blank if not provided.
+        - optional fields (error, max_iters, max_depth, one_tree_per_output) will
+        just be left blank if not provided.
     """
 
     method: AlsMethod
@@ -212,15 +224,23 @@ def csv_columns(self) -> list[str]:
         ]
         for metric in self.metrics:
             columns.append(metric.value)
+            if self.validation is not None and metric.is_error_metric():
+                columns.append(f"v_{metric.value}")
 
         return columns
 
-    def csv_values(self, results: dict[Metric, float]) -> list[str]:
+    def csv_values(
+        self,
+        results: dict[Metric, float],
+        validation_results: None | dict[Metric, float],
+    ) -> list[str]:
         """
         Returns the values of the columns if exporting this config's execution to
         a CSV row.
         A Results dict must be provided, it is assumed it contains the results
         for the metrics given to this config.
+        A validation Results dict can be provided, if it is, it's assumed it
+        contains the results for the error metrics provided to this config.
         """
         values = [
             self.method.value,
@@ -234,6 +254,10 @@ def csv_values(self, results: dict[Metric, float]) -> list[str]:
 
         for metric in self.metrics:
             values.append(results[metric])
+            if validation_results is not None and metric.is_error_metric():
+                # We use `get` because maybe the metric might not be in the dict
+                # if the 'validation' option was given with a value of 0
+                values.append(validation_results.get(metric, None))
 
         stringified_values = [
             str(value) if value is not None else "" for value in values
@@ -358,7 +382,9 @@ def _validate_validation(validation: float | None) -> float | None:
     Raises ValueError if the value is not in the specified range.
     """
     if validation is not None:
-        if not (0 < validation <= 1):
-            raise ValueError("'validation' value must be a float in the range 0 < x <= 1.")
+        if not (0 <= validation < 1.0):
+            raise ValueError(
+                "'validation' value must be a float in the range 0 <= x < 1."
+            )
 
     return validation
diff --git a/runner.py b/runner.py
index f8b1796..b128849 100644
--- a/runner.py
+++ b/runner.py
@@ -1,3 +1,4 @@
+from collections import deque
 from collections.abc import Callable
 import csv
 import os
@@ -28,21 +29,16 @@
 
 TB = f"{BUILD_DIR}/.tb.v"
 
+VALIDATION_DATASET = f"{BUILD_DIR}/.v_dataset"
+VALIDATION_TB = f"{BUILD_DIR}/.v_tb.v"
+VALIDATION_EXACT_OUTPUT = f"{BUILD_DIR}/.v_exact_output"
+VALIDATION_APPROX_OUTPUT = f"{BUILD_DIR}/.v_approx_output"
+
 VCD = f"{BUILD_DIR}/.vcd"
 SAIF = f"{BUILD_DIR}/.saif"
 
-# This list should contain all the Metrics that are related to approximation
-# errors.
-_APPROXIMATION_ERROR_METRICS: list[Metric] = [
-    Metric.HAMMING_DISTANCE,
-    Metric.MEAN_ERROR_DISTANCE,
-    Metric.WORST_CASE_ERROR,
-    Metric.MEAN_RELATIVE_ERROR_DISTANCE,
-    Metric.MEAN_SQUARED_ERROR_DISTANCE,
-]
-
 
-def run(config: ApproxSynthesisConfig) -> Results:
+def run(config: ApproxSynthesisConfig) -> tuple[Results, Results | None]:
     """
     Runner function for an execution specified by a valid ApproxSynthesisConfig.
     This function will do the following steps:
@@ -50,16 +46,27 @@ def run(config: ApproxSynthesisConfig) -> Results:
     - Carry out the given ALS method
     - Calculate the metrics given in config.metrics and return them as a Results
       dict.
+
+    If the 'validation' option is set, it will also return a second Results
+    object which contains the error metrics specified, calculated on the
+    validation set. It won't include non-error metrics like area or time since
+    those won't change based on the dataset.
     """
     if not os.path.exists(BUILD_DIR):
         os.makedirs(BUILD_DIR)
 
-    config.circuit.write_tb(TB, config.dataset, show_progress=config.show_progress)
-    config.circuit.exact_output(TB, EXACT_OUTPUT)
+    _create_tbs_and_exact_outputs(config)
 
     # The benchmark functions should return the final approximated circuit (for
-    # area calculation) and also carry out the final simulation to generate the
-    # APPROX_OUTPUT that will be used to calculate error metrics.
+    # area calculation and validation simulation) and also carry out the final
+    # simulation to generate the APPROX_OUTPUT that will be used to calculate
+    # error metrics.
+    #
+    # We want the benchmark_fn to carry out this final simulation because a lot
+    # of the methods need to carry out simulations in order to iterate (for
+    # example the constant inputs and outputs methods), so it's better to make
+    # use of those simulations instead of re-running the same sim outside of the
+    # benchmark_fn.
     benchmark_fn: Callable[[ApproxSynthesisConfig], Circuit]
     match config.method:
         case AlsMethod.CONSTANT_INPUTS:
@@ -80,12 +87,21 @@ def run(config: ApproxSynthesisConfig) -> Results:
     # - Execution of ALS method.
     # - Simulation of approximated circuit.
     # - Calculation of all metrics.
+    #
+    # We include simulations and calculation of all metrics because, even though
+    # they don't contribute directly to generating the final circuit, they are
+    # a necessary part of ALS in order to learn the circuit's characteristics
+    # and whether it's a worthwhile candidate.
     start_time = time.perf_counter()
 
     original_area = float(config.circuit.get_area())
 
     approx_circuit = benchmark_fn(config)
-    results = _compute_error_metrics(config)
+
+    if config.validation is not None:
+        approx_circuit.simulate(VALIDATION_TB, VALIDATION_APPROX_OUTPUT)
+
+    results, validation_results = _compute_error_metrics(config)
 
     if Metric.AREA in config.metrics:
         approx_area = float(approx_circuit.get_area())
@@ -101,9 +117,9 @@ def run(config: ApproxSynthesisConfig) -> Results:
     approx_circuit.write_to_disk(APPROX_RTL)
 
     if config.csv:
-        _write_results_to_csv(config, results)
+        _write_results_to_csv(config, results, validation_results)
 
-    return results
+    return results, validation_results
 
 
 def _create_saif(config: ApproxSynthesisConfig):
@@ -123,7 +139,9 @@ def _create_saif(config: ApproxSynthesisConfig):
     config.circuit.generate_saif_from_vcd(SAIF, VCD)
 
 
-def _write_results_to_csv(config: ApproxSynthesisConfig, results: Results):
+def _write_results_to_csv(
+    config: ApproxSynthesisConfig, results: Results, validation_results: None | Results
+):
     """
     Writes the execution results to a CSV file as a single row.
     """
@@ -138,13 +156,17 @@ def _write_results_to_csv(config: ApproxSynthesisConfig, results: Results):
         # If the file does not exist, write the header
         if not file_exists:
             writer.writerow(config.csv_columns())
-        writer.writerow(config.csv_values(results))
+
+        print("VALIDATION RESULTS:", validation_results)
+        writer.writerow(config.csv_values(results, validation_results))
 
 
 def _run_decision_tree(config: ApproxSynthesisConfig) -> Circuit:
     exact_circuit = config.circuit
-    inputs = read_dataset(config.dataset, 16)
     outputs = read_dataset(EXACT_OUTPUT, 10)
+    inputs = read_dataset(config.dataset, 16, max_lines=len(outputs))
+    # We use max_lines because the output set might be smaller due to a
+    # validation set being used
 
     tree = DecisionTreeCircuit(
         exact_circuit.inputs,
@@ -272,14 +294,26 @@ def _run_ccarving(config: ApproxSynthesisConfig) -> Circuit:
     return config.circuit  # TODO Implement method
 
 
-def _compute_error_metrics(config: ApproxSynthesisConfig) -> Results:
-    result: Results = {}
+def _compute_error_metrics(
+    config: ApproxSynthesisConfig,
+) -> tuple[Results, Results | None]:
+    results: Results = {}
+    if config.validation is not None:
+        validation_results = {}
+    else:
+        validation_results = None
+
     for metric in config.metrics:
-        if metric in _APPROXIMATION_ERROR_METRICS:
+        if metric.is_error_metric():
             error = compute_error(metric.value, EXACT_OUTPUT, APPROX_OUTPUT)
-            result[metric] = error
+            results[metric] = error
+            if validation_results is not None and config.validation != 0:
+                error = compute_error(
+                    metric.value, VALIDATION_EXACT_OUTPUT, VALIDATION_APPROX_OUTPUT
+                )
+                validation_results[metric] = error
 
-    return result
+    return results, validation_results
 
 
 def _get_lsbs_up_to(variables: list[str], bit_index: int) -> list[str]:
@@ -328,3 +362,53 @@ def _get_lsbs_up_to(variables: list[str], bit_index: int) -> list[str]:
             result.append(var)
 
     return result
+
+
+def _create_tbs_and_exact_outputs(config: ApproxSynthesisConfig):
+    """
+    Generate test and validation testbenches along with their exact outputs
+    based on the provided configuration.
+
+    If a validation fraction is specified, the function splits the dataset into
+    test and validation sets, creating corresponding testbenches and exact
+    outputs; otherwise, it creates a single testbench using the full dataset.
+    """
+    if config.validation is not None:
+        # Count the number of inputs in the dataset
+        with open(config.dataset, "r") as file:
+            total_lines = sum(1 for _ in file)
+
+        # Create TB that reads only the test dataset
+        test_dataset_size = int(round((1 - config.validation) * total_lines))
+        config.circuit.write_tb(
+            TB,
+            config.dataset,
+            show_progress=config.show_progress,
+            iterations=test_dataset_size,
+        )
+        config.circuit.exact_output(TB, EXACT_OUTPUT)
+
+        # Create validation TB
+        validation_dataset_size = int(round(config.validation * total_lines))
+        _copy_last_n_lines(config.dataset, VALIDATION_DATASET, validation_dataset_size)
+        config.circuit.write_tb(
+            VALIDATION_TB, VALIDATION_DATASET, show_progress=config.show_progress
+        )
+        config.circuit.exact_output(VALIDATION_TB, VALIDATION_EXACT_OUTPUT)
+    else:
+        # No validation set, just create a regular TB using the full dataset
+        config.circuit.write_tb(TB, config.dataset, show_progress=config.show_progress)
+        config.circuit.exact_output(TB, EXACT_OUTPUT)
+
+
+def _copy_last_n_lines(input_file: str, output_file: str, n: int) -> None:
+    """
+    Copy the last N lines from an input file to an output file.
+
+    Used to create a validation dataset from the original dataset.
+    """
+    with open(input_file, "r") as infile:
+        last_n_lines = deque(infile, maxlen=n)
+
+    with open(output_file, "w") as outfile:
+        outfile.writelines(last_n_lines)

From 4c937869286aa7d09e5f9ed1c95d1b53dfea965d Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Wed, 14 May 2025 18:06:23 -0600
Subject: [PATCH 33/65] Add ProbPrun method to runner

---
 circuit.py       |  5 ++--
 configuration.py |  6 ++---
 runner.py        | 64 +++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 64 insertions(+), 11 deletions(-)

diff --git a/circuit.py b/circuit.py
index 011877c..fe8dfca 100644
--- a/circuit.py
+++ b/circuit.py
@@ -532,7 +532,7 @@ def exact_output (self, testbench, output_file):
 
     def simulate(self, testbench, approximate_output):
         '''
-        Simulates the actual circuit tree (with deletions)
+        Simulates the circuit tree with deletions.
         Creates an executable using icarus, end then execute it to obtain the
         output of the testbench
 
@@ -787,7 +787,8 @@ def write_tb(self, filename, dataset_file, iterations=None, timescale= '10ns / 1
             text += '$display("-- Beginning Simulation --");\n\n'
 
         if dump_vcd:
-            text=f'{text} $dumpfile("{dump_vcd}");\n' \
+            relative_vcd_path = os.path.relpath(dump_vcd, start=os.path.dirname(filename))
+            text=f'{text} $dumpfile("{relative_vcd_path}");\n' \
                  f' $dumpvars(0,{self.topmodule}_tb);\n'
 
         relative_dataset_path = os.path.relpath(dataset_file, start=os.path.dirname(filename))
diff --git a/configuration.py b/configuration.py
index ce20ef8..ce29153 100644
--- a/configuration.py
+++ b/configuration.py
@@ -9,7 +9,7 @@
 class AlsMethod(str, Enum):
     CONSTANT_INPUTS = "inconst"
     CONSTANT_OUTPUTS = "outconst"
-    PROBRUN = "probrun"
+    PROBPRUN = "probprun"
     SIGNIFICANCE = "significance"
     CCARVING = "ccarving"
     DECISION_TREE = "decision_tree"
@@ -64,7 +64,7 @@ def is_error_metric(self) -> bool:
 _ITERATIVE_METHODS = [
     AlsMethod.CONSTANT_INPUTS,
     AlsMethod.CONSTANT_OUTPUTS,
-    AlsMethod.PROBRUN,
+    AlsMethod.PROBPRUN,
     AlsMethod.SIGNIFICANCE,
     AlsMethod.CCARVING,
 ]
@@ -78,7 +78,7 @@ class ApproxSynthesisConfig:
     ----------
     method : AlsMethod | str
         One of the supported methods. Can use the AlsMethod enum or one of the
-        following string names: 'inconst', 'outconst', 'probrun',
+        following string names: 'inconst', 'outconst', 'probprun',
         'significance', 'ccarving', or 'decision_tree'.
 
     circuit : Circuit
diff --git a/runner.py b/runner.py
index b128849..1692dfa 100644
--- a/runner.py
+++ b/runner.py
@@ -8,6 +8,7 @@
 from circuiterror import compute_error
 from ml_algorithms.decision_tree import DecisionTreeCircuit
 from pruning_algorithms.inouts import GetInputs, GetOutputs
+from pruning_algorithms.probprun import GetOneNode
 from utils import read_dataset
 from configuration import AlsMethod, ApproxSynthesisConfig, Metric
 
@@ -35,6 +36,7 @@
 VALIDATION_APPROX_OUTPUT = f"{BUILD_DIR}/.v_approx_output"
 
 VCD = f"{BUILD_DIR}/.vcd"
+VCD_TB = f"{BUILD_DIR}/.vcd_tb.v"
 SAIF = f"{BUILD_DIR}/.saif"
 
 
@@ -73,9 +75,9 @@ def run(config: ApproxSynthesisConfig) -> tuple[Results, Results | None]:
             benchmark_fn = _run_constant_inputs
         case AlsMethod.CONSTANT_OUTPUTS:
             benchmark_fn = _run_constant_outputs
-        case AlsMethod.PROBRUN:
+        case AlsMethod.PROBPRUN:
             _create_saif(config)
-            benchmark_fn = _run_probrun
+            benchmark_fn = _run_probprun
         case AlsMethod.SIGNIFICANCE:
             benchmark_fn = _run_significance
         case AlsMethod.CCARVING:
@@ -127,8 +129,6 @@ def _create_saif(config: ApproxSynthesisConfig):
     Create a SAIF file and annotate the circuit with its data
     """
 
-    VCD_TB = f"{BUILD_DIR}/.vcd_tb.v"
-
     config.circuit.write_tb(
         VCD_TB,
         config.dataset,
@@ -282,8 +282,60 @@ def _run_constant_inputs_outputs(
     return circuit
 
 
-def _run_probrun(config: ApproxSynthesisConfig) -> Circuit:
-    return config.circuit  # TODO Implement method
+def _run_probprun(config: ApproxSynthesisConfig) -> Circuit:
+    # TODO (Possible improvement): After pruning some nodes, if we re-simulate
+    # the cirucit re-generating the vcd file, and with the new vcd re-generate
+    # the SAIF, one can notice a different timing behaviour from the remaining
+    # existing nodes. We don't re-simulate and re-generate the SAIF because the
+    # python method to regenerate the SAIF takes really long even for small
+    # datasets. (Example: BK_16b, 4000 inputs, takes ~30 seconds to generate
+    # SAIF) But, if we find a way to generate the SAIF file quickly, for example
+    # using a faster language for it, we might want to regenerate it on every
+    # iteration or every N iterations.
+
+    circuit = config.circuit
+    circuit_root = circuit.netl_root
+
+    assert config.error is not None, (
+        f"'error' should be given when executing {config.method}"
+    )
+
+    iteration = 0
+    max_iters = config.max_iters if config.max_iters else float("inf")
+
+    for node, output, time_percent in GetOneNode(circuit_root):
+        if iteration >= max_iters:
+            break
+
+        node_to_delete = circuit_root.find(f"./node[@var='{node}']")
+
+        assert node_to_delete is not None, (
+            f"Node {node} suggested by ProbPrun should be findable in the circuit"
+        )
+
+        print(
+            f"Pruning node {node_to_delete} because it's {output} {time_percent}% of the time"
+        )
+        node_to_delete.set("delete", "yes")
+
+        if config.resynthesis:
+            circuit.resynth()
+
+        error = circuit.simulate_and_compute_error(
+            TB, EXACT_OUTPUT, TEMP_OUTPUT, Metric.MEAN_RELATIVE_ERROR_DISTANCE
+        )
+
+        print(f"Pruned circuit error: {error}")
+
+        if iteration > 0 and error > config.error:
+            print("Error has overpassed threshold, undoing last prune\n")
+            node_to_delete.set("delete", "no")
+            break
+
+        iteration += 1
+        os.replace(TEMP_OUTPUT, APPROX_OUTPUT)
+
+    return circuit
 
 
 def _run_significance(config: ApproxSynthesisConfig) -> Circuit:

From 54a20e35661916d95a1975521b81d1cc0eef31ee Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Wed, 14 May 2025 18:40:12 -0600
Subject: [PATCH 34/65] Add significance method to runner

---
 runner.py | 48 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/runner.py b/runner.py
index 1692dfa..0ad896d 100644
--- a/runner.py
+++ b/runner.py
@@ -7,6 +7,7 @@
 from circuit import Circuit
 from circuiterror import compute_error
 from ml_algorithms.decision_tree import DecisionTreeCircuit
+from pruning_algorithms.glpsignificance import GetbySignificance
 from pruning_algorithms.inouts import GetInputs, GetOutputs
 from pruning_algorithms.probprun import GetOneNode
 from utils import read_dataset
@@ -339,7 +340,52 @@ def _run_probprun(config: ApproxSynthesisConfig) -> Circuit:
 
 
 def _run_significance(config: ApproxSynthesisConfig) -> Circuit:
-    return config.circuit  # TODO Implement method
+    circuit = config.circuit
+    circuit_root = circuit.netl_root
+
+    assert config.error is not None, (
+        f"'error' should be given when executing {config.method}"
+    )
+
+    iteration = 0
+    max_iters = config.max_iters if config.max_iters else float("inf")
+
+    # TODO: Allow specifying the output significances in the config
+    output_significances = []
+
+    for node, significance in GetbySignificance(circuit_root):
+        if iteration >= max_iters:
+            break
+
+        node_to_delete = circuit_root.find(f"./node[@var='{node}']")
+
+        assert node_to_delete is not None, (
+            f"Node {node} suggested by GetbySignificance should be findable in the circuit"
+        )
+
+        print(
+            f"Pruning node {node} because its significance is {significance}"
+        )
+        node_to_delete.set("delete", "yes")
+
+        if config.resynthesis:
+            circuit.resynth()
+
+        error = circuit.simulate_and_compute_error(
+            TB, EXACT_OUTPUT, TEMP_OUTPUT, Metric.MEAN_RELATIVE_ERROR_DISTANCE
+        )
+
+        print(f"Pruned circuit error: {error}")
+
+        if iteration > 0 and error > config.error:
+            print("Error has overpassed threshold, undoing last prune\n")
+            node_to_delete.set("delete", "no")
+            break
+
+        iteration += 1
+        os.replace(TEMP_OUTPUT, APPROX_OUTPUT)
+
+    return circuit
 
 
 def _run_ccarving(config: ApproxSynthesisConfig) -> Circuit:

From b63ae5c17fd134b0f67b74c855a21833fb8538b8 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 15 May 2025 13:56:42 -0600
Subject: [PATCH 35/65] Add --saif flag to CLI

---
 __main__.py |  8 +++++++-
 runner.py   | 13 ++++++++++++-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/__main__.py b/__main__.py
index e22dd6d..c90b297 100644
--- a/__main__.py
+++ b/__main__.py
@@ -64,7 +64,9 @@ def main():
             )
 
         try:
-            circuit = Circuit(args.circuit, TECH)
+            saif = args.saif or ""
+
+            circuit = Circuit(args.circuit, TECH, saif)
             config = ApproxSynthesisConfig(
                 method=args.method,
                 circuit=circuit,
@@ -122,6 +124,10 @@ def run_arguments(run_parser):
         # TODO: Add docs about what each metric is
         help="Metrics to calculate, defaults to mred and time.",
     )
+    run_parser.add_argument(
+        "--saif",
+        help="SAIF file for the circuit. Used by 'probrun' method. If not provided, one will be generated during execution.",
+    )
     run_parser.add_argument(
         "--resynthesis", action="store_true", help="If provided will use resynthesis."
     )
diff --git a/runner.py b/runner.py
index 0ad896d..140b85c 100644
--- a/runner.py
+++ b/runner.py
@@ -127,9 +127,20 @@ def run(config: ApproxSynthesisConfig) -> tuple[Results, Results | None]:
 
 def _create_saif(config: ApproxSynthesisConfig):
     """
-    Create a SAIF file and annotate the circuit with its data
+    If the circuit doesn't have timing information, create a SAIF file and
+    annotate the circuit with its data.
     """
 
+    nodes = config.circuit.netl_root.findall("node")
+    node_outputs = [node.findall("output")[0] for node in nodes]
+    circuit_has_timing_info = all(
+        "t1" in node_output.attrib for node_output in node_outputs
+    )
+
+    if circuit_has_timing_info:
+        print("Circuit already has timing info, skipping SAIF generation")
+        return
+
     config.circuit.write_tb(
         VCD_TB,
         config.dataset,

From 66fcd582eb2e525586a3dcde5aec2f7fb0f6b999 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 15 May 2025 14:08:52 -0600
Subject: [PATCH 36/65] Remove max_iters from csv and leave TODO about how csv
 format is arbitrary

---
 configuration.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/configuration.py b/configuration.py
index ce29153..0311468 100644
--- a/configuration.py
+++ b/configuration.py
@@ -138,7 +138,7 @@ class ApproxSynthesisConfig:
         appended.
 
         The output will be given as a single line with the following columns:
-            method, circuit, resynthesis, error, max_iters, max_depth, one_tree_per_output, metric1, metric2, ...
+            method, circuit, resynthesis, error, max_depth, one_tree_per_output, metric1, metric2, ...
 
         If the 'validation' option is given, the metrics will include validation results, formatted as:
 
@@ -148,9 +148,16 @@ class ApproxSynthesisConfig:
         applies only to error metrics.
 
         - bool values are stored as "True" or "False".
-        - optional fields (error, max_iters, max_depth, one_tree_per_output) will
+        - optional fields (error, max_depth, one_tree_per_output) will
         just be left blank if not provided.
     """
+    # TODO: the configuration options included in the csv: resynthesis, error,
+    # max_depth and one_tree_per_output; were chosen arbitrarily and are not
+    # necessarily more interesting than other options not included. Perhaps the
+    # configuration options included in the CSV should also be configurable, or
+    # we should include any options that are not None, or we should always
+    # include every single possible option and metric in the csv, even those
+    # not specified.
 
     method: AlsMethod
     circuit: Circuit
@@ -218,7 +225,6 @@ def csv_columns(self) -> list[str]:
             "circuit",
             "resynthesis",
             "error",
-            "max_iters",
             "max_depth",
             "one_tree_per_output",
         ]
@@ -247,7 +253,6 @@ def csv_values(
             self.circuit.topmodule,
             self.resynthesis,
             self.error,
-            self.max_iters,
             self.max_depth,
             self.one_tree_per_output,
         ]

From 1b2890a6e51c1b6267bac880aed93c2ad4e9086f Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 15 May 2025 14:28:30 -0600
Subject: [PATCH 37/65] Add output significances to config object

---
 configuration.py | 37 ++++++++++++++++++++++++++++++++++++-
 runner.py        | 14 +++++++-------
 2 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/configuration.py b/configuration.py
index 0311468..cc7fa61 100644
--- a/configuration.py
+++ b/configuration.py
@@ -129,6 +129,15 @@ class ApproxSynthesisConfig:
         If True, uses a separate tree per output.
         If False, uses a single multi-output tree.
 
+    output_significances: list[int]
+        List of significances of the circuit outputs. Should match the number of
+        output bits of the circuit.
+
+        If not provided a significance of 2**i will be assumed, where i is the
+        index of the output bit. (LSB has less significance that MSB.)
+
+        Used by the 'significance' and 'ccarving' methods.
+
     show_progress : bool, default=False
         Whether to show simulation progress.
 
@@ -151,6 +160,7 @@ class ApproxSynthesisConfig:
         - optional fields (error, max_depth, one_tree_per_output) will
         just be left blank if not provided.
     """
+
     # TODO: the configuration options included in the csv: resynthesis, error,
     # max_depth and one_tree_per_output; were chosen arbitrarily and are not
     # necessarily more interesting than other options not included. Perhaps the
@@ -169,6 +179,7 @@ class ApproxSynthesisConfig:
     max_iters: int | None
     max_depth: int | None
     one_tree_per_output: bool
+    output_significances: list[int] | None
     show_progress: bool
     csv: str | None
 
@@ -184,6 +195,7 @@ def __init__(
         max_iters: int | None = None,
         max_depth: int | None = None,
         one_tree_per_output: bool = False,
+        output_significances: list[int] | None = None,
         show_progress: bool = False,
         csv: str | None = None,
     ):
@@ -207,6 +219,9 @@ def __init__(
 
         self.max_depth = _validate_max_depth(max_depth, self.method)
         self.one_tree_per_output = one_tree_per_output
+        self.output_significances = _validate_output_significances(
+            self.circuit, output_significances
+        )
         self.show_progress = show_progress
         self.csv = csv
 
@@ -369,7 +384,7 @@ def _validate_max_depth(
     """
     if method == AlsMethod.DECISION_TREE:
         if max_depth is None:
-            raise ValueError(f"'max_depth' is required for method f{method}.")
+            raise ValueError(f"'max_depth' is required for method {method}.")
         else:
             if max_depth <= 1:
                 raise ValueError("max_depth must be > 1")
@@ -393,3 +408,23 @@ def _validate_validation(validation: float | None) -> float | None:
             )
 
     return validation
+
+
+def _validate_output_significances(
+    circuit: Circuit,
+    output_significances: list[int] | None,
+) -> list[int] | None:
+    """
+    Validates 'output_significances'.
+
+    Ensures that if provided it matches in length the circuit's outputs.
+
+    Raises ValueError if mismatched.
+    """
+    if output_significances is not None:
+        if len(output_significances) != len(circuit.outputs):
+            raise ValueError(
+                f"'output_significances' length ({len(output_significances)}) does not match the amount of circuit outputs ({len(circuit.outputs)})."
+            )
+
+    return output_significances
diff --git a/runner.py b/runner.py
index 140b85c..6fd983c 100644
--- a/runner.py
+++ b/runner.py
@@ -301,7 +301,7 @@ def _run_probprun(config: ApproxSynthesisConfig) -> Circuit:
     # existing nodes. We don't re-simulate and re-generate the SAIF because the
     # python method to regenerate the SAIF takes really long even for small
     # datasets. (Example: BK_16b, 4000 inputs, takes ~30 seconds to generate
-    # SAIF) But, if we find a way to generate the SAIF file quickly, for example
+    # SAIF.) But, if we find a way to generate the SAIF file quickly, for example
     # using a faster language for it, we might want to regenerate it on every
     # iteration or every N iterations.
 
@@ -361,10 +361,12 @@ def _run_significance(config: ApproxSynthesisConfig) -> Circuit:
     iteration = 0
     max_iters = config.max_iters if config.max_iters else float("inf")
 
-    # TODO: Allow specifying the output significances in the config
-    output_significances = []
+    if config.output_significances is not None:
+        output_significances = config.output_significances
+    else:
+        output_significances = []
 
-    for node, significance in GetbySignificance(circuit_root):
+    for node, significance in GetbySignificance(circuit_root, output_significances):
         if iteration >= max_iters:
             break
 
@@ -374,9 +376,7 @@ def _run_significance(config: ApproxSynthesisConfig) -> Circuit:
             f"Node {node} suggested by GetbySignificance should be findable in the circuit"
         )
 
-        print(
-            f"Pruning node {node} because its significance is {significance}"
-        )
+        print(f"Pruning node {node} because its significance is {significance}")
         node_to_delete.set("delete", "yes")
 
         if config.resynthesis:

From bd94f3ad1031421b39832fdbba36f2fde94dc411 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 15 May 2025 18:15:34 -0600
Subject: [PATCH 38/65] Add shuffle bag non-repeating distribution

---
 circuit.py | 15 +++++++++++++++
 utils.py   | 17 +++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/circuit.py b/circuit.py
index fe8dfca..f8eea1a 100644
--- a/circuit.py
+++ b/circuit.py
@@ -628,8 +628,23 @@ def generate_dataset(self, filename, samples, distribution='uniform', **kwargs):
                 "gaussian" or "normal" for a normal distribution.
                 "uniform" or "rectangular" for a uniform distribution.
                 "triangular" for a triangular distribution.
+                "shuffle_bag": It's "uniform-like", but avoids repeating values
+                               until the full dataset has been used, employing a
+                               shuffle bag algorithm.
                 TODO: Add more distributions
 
+            shuffle_bag WARNING ⚠️:
+            -----------------------
+                This mode generates a complete list of all possible input
+                combinations in memory, then shuffles and samples from it. It
+                guarantees no repeats, but is very memory intensive.
+
+                For circuits with:
+                  - 32 inputs: needs ~4.3 billion entries (~137GB RAM)
+                  - 16 inputs: only ~65,536 entries (~2MB RAM)
+
+                Use only for small circuits (preferably under 16 inputs).
+
         **kwargs: (optional)
 
         median: int
diff --git a/utils.py b/utils.py
index 09b145f..61d50e5 100644
--- a/utils.py
+++ b/utils.py
@@ -28,6 +28,10 @@ def get_random(bits: int, distribution='uniform', samples=1, **kwargs):
             "gaussian" or "normal" for a normal distribution.
             "uniform" or "rectangular" for a uniform distribution.
             "triangular" for a triangular distribution.
+            "shuffle_bag": Uniform, non-repeating values using a shuffle bag
+                           algorithm. Should not be used for a lot of input bits,
+                           see `Circuit.generate_dataset` docs for a detailed
+                           explanation why.
             TODO: Add more distributions
     samples: int
         Number of samples.
@@ -75,6 +79,19 @@ def get_random(bits: int, distribution='uniform', samples=1, **kwargs):
             random_value=int(math.floor(gauss(median,variance)))
             if low_limit<=random_value<=high_limit:
                 data.append(random_value)
+    elif distribution == 'shuffle_bag':
+        range_size = high_limit - low_limit
+        num_cycles = math.ceil(samples / range_size)
+
+        for i in range(num_cycles):
+            bag = list(range(low_limit, high_limit))
+            random.shuffle(bag)
+
+            samples_remaining = samples - len(data)
+            if samples_remaining > range_size:
+                data.extend(bag)
+            else:
+                data.extend(bag[0:samples_remaining])
     else:
         raise ValueError(f'{distribution} is not a valid distribution name')
 

From de53e97243b8de90ae54f45846f237b443736295 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 15 May 2025 19:28:47 -0600
Subject: [PATCH 39/65] Fix shuffle bag implementation creation of rows

---
 circuit.py | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/circuit.py b/circuit.py
index f8eea1a..6058ed5 100644
--- a/circuit.py
+++ b/circuit.py
@@ -679,13 +679,30 @@ def generate_dataset(self, filename, samples, distribution='uniform', **kwargs):
             else:
                 inputs_info[name]=1
 
+        format=f'0{bitwidth}b' if format=='b' else format #ensure right number of bits if binary
+
         '''Iterate inputs'''
+        if distribution == "shuffle_bag":
+            # Shuffle bag needs to generate all the inputs together to ensure
+            # avoiding repetition of the circuit's inputs as a whole.
+            total_bits = sum(inputs_info.values())
+            inputs = get_random(total_bits, distribution, samples, **kwargs)
+            for input in inputs:
+                shift_right = total_bits
+                row = []
+                for bitwidth in inputs_info.values():
+                    shift_right -= bitwidth
+                    mask = (1<< bitwidth)-1
+                    value = (input >> shift_right) & mask
+                    row.append(f'{value:{format}}')
+                data.append(row)
+
+        else:
+            for bitwidth in inputs_info.values():
+                rows=get_random(bitwidth,distribution,samples, **kwargs)
+                data.append([f'{i:{format}}' for i in rows])
+            data=list(zip(*data)) # Transpose data see: https://stackoverflow.com/questions/10169919/python-matrix-transpose-and-zip
 
-        for bitwidth in inputs_info.values():
-            rows=get_random(bitwidth,distribution,samples, **kwargs)
-            format=f'0{bitwidth}b' if format=='b' else format #ensure right number of bits if binary
-            data.append([f'{i:{format}}' for i in rows])
-        data=list(zip(*data)) # Transpose data see: https://stackoverflow.com/questions/10169919/python-matrix-transpose-and-zip
         np.savetxt(filename,data,fmt='%s')
 
         return

From 3d454ccd0a1d8284677958b2419150b93e5fd2e6 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 15 May 2025 19:47:04 -0600
Subject: [PATCH 40/65] Add --distribution flag to generate subcommand

---
 __main__.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/__main__.py b/__main__.py
index c90b297..62928dc 100644
--- a/__main__.py
+++ b/__main__.py
@@ -99,7 +99,6 @@ def main():
             for metric, value in validation_results.items():
                 print(f"{metric.value}: {metric.to_user_friendly_display(value)}")
 
-
     elif args.subcommand == "generate":
         generate_dataset(args)
 
@@ -181,6 +180,12 @@ def generate_arguments(generate_parser):
     """
     generate_parser.add_argument("circuit", help="Verilog circuit file.")
     generate_parser.add_argument("dataset", help="Dataset file to generate.")
+    generate_parser.add_argument(
+        "--distribution",
+        default="uniform",
+        choices=["gaussian", "uniform", "triangular", "shuffle_bag"],
+        help=".",
+    )
     generate_parser.add_argument(
         "size",
         type=parse_generate,
@@ -210,7 +215,7 @@ def generate_dataset(args: argparse.Namespace):
         max_inputs = 2 ** (len(circuit.inputs))
         size = round(max_inputs * size)
 
-    circuit.generate_dataset(args.dataset, size)
+    circuit.generate_dataset(args.dataset, size, args.distribution)
 
 
 if __name__ == "__main__":

From 5771625b363695bc55638b2fcad433c3234cb941 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 15 May 2025 19:49:17 -0600
Subject: [PATCH 41/65] Add ccarving

---
 runner.py | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 59 insertions(+), 2 deletions(-)

diff --git a/runner.py b/runner.py
index 6fd983c..5849027 100644
--- a/runner.py
+++ b/runner.py
@@ -7,7 +7,8 @@
 from circuit import Circuit
 from circuiterror import compute_error
 from ml_algorithms.decision_tree import DecisionTreeCircuit
-from pruning_algorithms.glpsignificance import GetbySignificance
+from pruning_algorithms.ccarving import FindCut
+from pruning_algorithms.glpsignificance import GetbySignificance, LabelCircuit
 from pruning_algorithms.inouts import GetInputs, GetOutputs
 from pruning_algorithms.probprun import GetOneNode
 from utils import read_dataset
@@ -400,7 +401,63 @@ def _run_significance(config: ApproxSynthesisConfig) -> Circuit:
 
 
 def _run_ccarving(config: ApproxSynthesisConfig) -> Circuit:
-    return config.circuit  # TODO Implement method
+    circuit = config.circuit
+    circuit_root = circuit.netl_root
+
+    assert config.error is not None, (
+        f"'error' should be given when executing {config.method}"
+    )
+
+    iteration = 0
+    max_iters = config.max_iters if config.max_iters else float("inf")
+
+    if config.output_significances is not None:
+        output_significances = config.output_significances
+    else:
+        output_significances = []
+
+    # TODO: Allow specifying the diff threshold in the config
+    diff_threshold = 2 ** (len(circuit.outputs)) - 1
+
+    # TODO: NEED to add harshness_level to the config.
+    harshness_level = 1
+
+    # Currently (May 2025) significance is the only relevant difference metric
+    # that we have available, if this changes in the future 'diff' could be added
+    # as a config parameter.
+    diff = "significance"
+
+    LabelCircuit(circuit_root, output_significances)
+
+    while iteration < max_iters:
+        print("Finding cuts...", flush=True)
+
+        cuts = FindCut(circuit_root, diff_threshold, diff, harshness_level)
+
+        nodes_to_delete = cuts[0]
+        nodes_to_delete_names = [n.attrib["var"] for n in nodes_to_delete]
+
+        print(f"Pruning nodes {nodes_to_delete_names} as a single cut...\n")
+        [n.set("delete", "yes") for n in nodes_to_delete]
+
+        if config.resynthesis:
+            circuit.resynth()
+
+        error = circuit.simulate_and_compute_error(
+            TB, EXACT_OUTPUT, TEMP_OUTPUT, Metric.MEAN_RELATIVE_ERROR_DISTANCE
+        )
+
+        print(f"Pruned circuit error: {error}")
+
+        if iteration > 0 and error > config.error:
+            print("Error has overpassed threshold, undoing last prune\n")
+            [n.set("delete", "no") for n in nodes_to_delete]
+            break
+
+        iteration += 1
+        os.replace(TEMP_OUTPUT, APPROX_OUTPUT)
+
+    return circuit
 
 
 def _compute_error_metrics(

From 8e0d4bd926f84e54494d071abe72217a2f801e26 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Fri, 16 May 2025 13:40:30 -0600
Subject: [PATCH 42/65] Fix uniform generation of big values

---
 circuit.py |  4 ++++
 utils.py   | 10 ++++++++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/circuit.py b/circuit.py
index 6058ed5..e7b3757 100644
--- a/circuit.py
+++ b/circuit.py
@@ -643,6 +643,10 @@ def generate_dataset(self, filename, samples, distribution='uniform', **kwargs):
                   - 32 inputs: needs ~4.3 billion entries (~137GB RAM)
                   - 16 inputs: only ~65,536 entries (~2MB RAM)
 
+                This is regardless of how many samples you're actually
+                grabbing! Even if you only grab 1 sample the full dataset will
+                be instantiated.
+
                 Use only for small circuits (preferably under 16 inputs).
 
         **kwargs: (optional)
diff --git a/utils.py b/utils.py
index 61d50e5..37bdcd1 100644
--- a/utils.py
+++ b/utils.py
@@ -5,7 +5,7 @@
 from typing import List
 import numpy as np
 import math
-from random import uniform, gauss, triangular
+from random import randrange, gauss, triangular
 
 def get_name(length):
     timestamp = datetime.now().strftime("%H%M%S")
@@ -71,7 +71,13 @@ def get_random(bits: int, distribution='uniform', samples=1, **kwargs):
     '''Distributions case'''
     data=[]
     if distribution in {'uniform', 'rectangular'}:
-        data=(int(math.floor(uniform(low_limit,high_limit))) for _ in range(samples))
+        data=(randrange(low_limit, high_limit) for _ in range(samples))
+
+    # TODO: There's an issue with the `triangular` and `gauss` method, which is
+    # that due to returning floats, they generate values were only around 50
+    # MSBs have a non-zero value, which makes them unsuitable for larger
+    # circuits that can have 64, 128, or even more input bits.
+
     elif distribution=='triangular':
         data=(int(math.floor(triangular(low_limit,high_limit,mode=median))) for _ in range(samples))
     elif distribution in {'normal', 'gaussian'}:

From ca2f1693abf0c5bb1cde92f06a95adfef6357a31 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Sun, 18 May 2025 17:06:03 -0600
Subject: [PATCH 43/65] Fix resynthesis variable substitution to add quotes
 around paths

---
 synthesis.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/synthesis.py b/synthesis.py
index 96af95b..8491d59 100644
--- a/synthesis.py
+++ b/synthesis.py
@@ -77,12 +77,12 @@ def resynthesis(netlist, tech, topmodule):
 
     netlist_path = os.path.dirname(netlist) + "/netlist.v"
 
-    file_text = file_text.replace("[[RTLFILENAME]]", netlist)
+    file_text = file_text.replace("[[RTLFILENAME]]", f'"{netlist}"')
     file_text = file_text.replace("[[TOPMODULE]]", topmodule)
-    file_text = file_text.replace("[[TECHNOLOGY]]", f'{current_dir}/templates/{tech}.v')
-    file_text = file_text.replace("[[NETLIST]]", netlist_path)
-    file_text = file_text.replace("[[LIBRARY]]", f"{current_dir}/templates/{tech}.lib")
-    file_text = file_text.replace("[[LIBRARYABC]]", f"{current_dir}/templates/{tech}.lib")
+    file_text = file_text.replace("[[TECHNOLOGY]]", f'"{current_dir}/templates/{tech}.v"')
+    file_text = file_text.replace("[[NETLIST]]", f'"{netlist_path}"')
+    file_text = file_text.replace("[[LIBRARY]]", f'"{current_dir}/templates/{tech}.lib"')
+    file_text = file_text.replace("[[LIBRARYABC]]", f'"{current_dir}/templates/{tech}.lib"')
 
     file = open('resynth.ys',"w")
     file.write(file_text)

From 9c204936bd6b330c2906f25273edee7129f37937 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Sun, 18 May 2025 17:06:52 -0600
Subject: [PATCH 44/65] Add extra print info to runner

---
 runner.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/runner.py b/runner.py
index 5849027..34a5e0c 100644
--- a/runner.py
+++ b/runner.py
@@ -190,6 +190,7 @@ def _run_decision_tree(config: ApproxSynthesisConfig) -> Circuit:
 
     tree.train(inputs, outputs)
     tree.to_verilog_file(exact_circuit.topmodule, APPROX_RTL)
+    print("Synthesizing circuit from trained decision tree, this might take a while...")
     tree_circuit = Circuit(
         APPROX_RTL, exact_circuit.tech_file, topmodule=exact_circuit.topmodule
     )
@@ -272,7 +273,7 @@ def _run_constant_inputs_outputs(
 
         node_to_delete = deletable_nodes.pop(0)
 
-        print(f"Pruning node {node_to_delete.attrib['var']}")
+        print(f"Iteration {iteration+1}: Pruning node {node_to_delete.attrib['var']}")
         node_to_delete.set("delete", "yes")
 
         if config.resynthesis:
@@ -327,7 +328,7 @@ def _run_probprun(config: ApproxSynthesisConfig) -> Circuit:
         )
 
         print(
-            f"Pruning node {node_to_delete} because it's {output} {time_percent}% of the time"
+            f"Iteration {iteration+1}: Pruning node {node_to_delete} because it's {output} {time_percent}% of the time"
         )
         node_to_delete.set("delete", "yes")
 
@@ -377,7 +378,7 @@ def _run_significance(config: ApproxSynthesisConfig) -> Circuit:
             f"Node {node} suggested by GetbySignificance should be findable in the circuit"
         )
 
-        print(f"Pruning node {node} because its significance is {significance}")
+        print(f"Iteration {iteration+1}: Pruning node {node} because its significance is {significance}")
         node_to_delete.set("delete", "yes")
 
         if config.resynthesis:
@@ -437,7 +438,7 @@ def _run_ccarving(config: ApproxSynthesisConfig) -> Circuit:
         nodes_to_delete = cuts[0]
         nodes_to_delete_names = [n.attrib["var"] for n in nodes_to_delete]
 
-        print(f"Pruning nodes {nodes_to_delete_names} as a single cut...\n")
+        print(f"Iteration {iteration+1}: Pruning nodes {nodes_to_delete_names} as a single cut...\n")
         [n.set("delete", "yes") for n in nodes_to_delete]
 
         if config.resynthesis:

From 25b1d5d5b7e7a0c304d69ab2fce5689719cac246 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Sun, 18 May 2025 17:09:47 -0600
Subject: [PATCH 45/65] Fix simulation when running from a different dir

---
 circuit.py | 38 +++++++++++++++++---------------------
 1 file changed, 17 insertions(+), 21 deletions(-)

diff --git a/circuit.py b/circuit.py
index e7b3757..afd3009 100644
--- a/circuit.py
+++ b/circuit.py
@@ -502,23 +502,22 @@ def exact_output (self, testbench, output_file):
 
         top = self.topmodule
         current_dir=os.path.dirname(__file__)
-        tech = f"{current_dir}/templates/" + self.tech_file
+        tech = f"{current_dir}/templates/{self.tech_file}"
 
         # Executable is ran from the testbench folder, because the path to the
         # dataset is relative to the testbench file.
+        testbench = os.path.abspath(testbench)
         out = os.path.dirname(testbench)
 
-        """Better to temporarily change cwd when executing iverilog"""
-        cwd=os.getcwd()
-        os.chdir(current_dir)
-
         # - - - - - - - - - - - - - - - Execute icarus - - - - - - - - - - - - -
         # iverilog -l tech.v -o executable testbench.v netlist.v
-        kon = f"iverilog -l \"{tech}.v\" -o \"{out}/{top}\" {testbench} \"{rtl}\""
+        kon = f"iverilog -l \"{tech}.v\" -o \"{out}/{top}\" \"{testbench}\" \"{rtl}\""
         system(kon)
 
         # - - - - - - - - - - - - - Execute the testbench  - - - - - - - - - - -
-        system(f"cd \"{out}\"; ./{top}")
+        cwd=os.getcwd()
+        os.chdir(out)
+        system(f"./{top}")
 
         os.chdir(cwd)
 
@@ -527,11 +526,9 @@ def exact_output (self, testbench, output_file):
 
         rename(out + "/output.txt", output_file)
 
-        return
-
 
     def simulate(self, testbench, approximate_output):
-        '''
+        """
         Simulates the circuit tree with deletions.
         Creates an executable using icarus, end then execute it to obtain the
         output of the testbench
@@ -544,36 +541,35 @@ def simulate(self, testbench, approximate_output):
             Path to the output file where simulation results will be written.
             The user must provide the full file path and name. If the file
             exists, it will be overwritten.
-        '''
+        """
         rtl = f"{self.output_folder}/{get_name(5)}.v"
         self.write_to_disk(rtl)
 
         top = self.topmodule
-        tech = "./templates/" + self.tech_file
+        current_dir = os.path.dirname(__file__)
+        tech = f"{current_dir}/templates/{self.tech_file}"
 
         # Executable is ran from the testbench folder, because the path to the
         # dataset is relative to the testbench file.
+        testbench = os.path.abspath(testbench)
         out = os.path.dirname(testbench)
 
-        """Better to temporarily change cwd when executing iverilog"""
-        cwd=os.getcwd()
-        current_dir=os.path.dirname(__file__)
-        os.chdir(current_dir)
-
         # - - - - - - - - - - - - - - - Execute icarus - - - - - - - - - - - - -
         # iverilog -l tech.v -o executable testbench.v netlist.v
-        kon = f"iverilog -l \"{tech}.v\" -o \"{out}/{top}\" {testbench} \"{rtl}\""
+        kon = f'iverilog -l "{tech}.v" -o "{out}/{top}" "{testbench}" "{rtl}"'
         system(kon)
 
         # - - - - - - - - - - - - - Execute the testbench  - - - - - - - - - - -
-        system(f"cd \"{out}\"; ./{top}")
-        os.chdir(cwd)
+        cwd = os.getcwd()
+        os.chdir(out)
+        system(f"./{top}")
 
-        rename(out + "/output.txt", approximate_output)
+        os.chdir(cwd)
 
         remove(rtl)
         remove(f"{out}/{top}")
 
+        rename(out + "/output.txt", approximate_output)
 
     def simulate_and_compute_error (self, testbench, exact_output, new_output, metric):
         '''

From 7522303ee94571858fb8d68f2ac137799142a572 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Mon, 19 May 2025 15:00:07 -0600
Subject: [PATCH 46/65] Improve assign parsing and fix some minor stuff

---
 netlist.py | 69 +++++++++++++++++++++++++++++++++---------------------
 runner.py  |  3 +--
 2 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/netlist.py b/netlist.py
index bf5e85b..55f1569 100644
--- a/netlist.py
+++ b/netlist.py
@@ -155,9 +155,8 @@ def to_xml(self):
 
         return root
 
-
     def get_inputs(self, netlist_rtl, raw_parameters):
-        '''
+        """
         Extracts the circuit's input variables.
 
         Inputs are returned in two ways:
@@ -181,21 +180,22 @@ def get_inputs(self, netlist_rtl, raw_parameters):
         -------
         tuple[list[str], list[str]]
             raw_inputs and circuit_inputs
-        '''
+        """
         raw_inputs = []
         circuit_inputs = []
         inputs = re.findall(
-            r'input\s*(\[([0-9]*):([0-9]*)\])*\s*([a-zA-Z0-9]*)',netlist_rtl)
+            r"input\s*(\[([0-9]*):([0-9]*)\])*\s*([a-zA-Z0-9]*)", netlist_rtl
+        )
         for i in inputs:
-            if i[0] != '':
+            if i[0] != "":
                 left = int(i[1])
                 right = int(i[2])
-                if (left > right):
-                    for x in range(left, right-1, -1):
-                        circuit_inputs.append(i[3]+'['+str(x)+']')
+                if left > right:
+                    for x in range(left, right - 1, -1):
+                        circuit_inputs.append(i[3] + "[" + str(x) + "]")
                 else:
-                    for x in range(left,right+1):
-                        circuit_inputs.append(i[3]+'['+str(x)+']')
+                    for x in range(left, right + 1):
+                        circuit_inputs.append(i[3] + "[" + str(x) + "]")
                 raw_inputs.append(f"input [{i[1]}:{i[2]}] {i[3]};")
             else:
                 circuit_inputs.append(f"{i[3]}")
@@ -205,9 +205,8 @@ def get_inputs(self, netlist_rtl, raw_parameters):
         raw_inputs = sort_raw_vars(raw_inputs, raw_parameters)
         return raw_inputs, circuit_inputs
 
-
     def get_outputs(self, netlist_rtl, raw_parameters):
-        '''
+        """
         Extracts the circuit's output variables.
 
         Outputs are returned in two ways:
@@ -232,20 +231,21 @@ def get_outputs(self, netlist_rtl, raw_parameters):
         -------
         tuple[list[str], list[str]]
             raw_outputs and circuit_outputs
-        '''
+        """
         raw_outputs = []
         circuit_outputs = []
         outputs = re.findall(
-            r'output\s*(\[([0-9]*):([0-9]*)\])*\s*([a-zA-Z0-9]*)',netlist_rtl)
+            r"output\s*(\[([0-9]*):([0-9]*)\])*\s*([a-zA-Z0-9]*)", netlist_rtl
+        )
         for o in outputs:
-            if o[0] != '':
+            if o[0] != "":
                 left = int(o[1])
                 right = int(o[2])
-                if (left > right):
-                    for x in range(left, right-1, -1):
+                if left > right:
+                    for x in range(left, right - 1, -1):
                         circuit_outputs.append(f"{o[3]}[{str(x)}]")
                 else:
-                    for x in range(left,right+1):
+                    for x in range(left, right + 1):
                         circuit_outputs.append(f"{o[3]}[{str(x)}]")
                 raw_outputs.append(f"output [{o[1]}:{o[2]}] {o[3]};")
             else:
@@ -338,7 +338,8 @@ def expand_range(expr):
             base, range_part = expr.split("[")
             range_part = range_part[:-1]
             start, end = map(int, range_part.split(":"))
-            return [f"{base}[{i}]" for i in range(start, end - 1, -1)]
+            step = -1 if start > end else 1
+            return [f"{base}[{i}]" for i in range(start, end + step, step)]
         else:
             return [expr]
     else:
@@ -391,6 +392,17 @@ def parse_assigns(content):
         - Ports mapped to wires by Yosys
         - Constant assignments in resynth
 
+    If the LHS is a full variable and the RHS is a concatenation or range with
+    multiple bits, the LHS is automatically expanded to match the RHS
+    bit width. For example:
+
+        assign out = { a[1], b[0] }
+
+    ...will produce:
+
+        [('out[1]', 'a[1]'),
+         ('out[0]', 'b[0]')]
+
     Parameters
     ----------
     content : string
@@ -408,20 +420,15 @@ def parse_assigns(content):
         ... assign foo[1:0] = bar[3:2];
         ... assign x = 0;
         ... assign { out[4:3], out[0:1] } = { in1[3], in2[1:0], in3[2] };
+        ... assign out = { in1[0:1], in2[0:1] }
         ... """
         >>> parse_assigns(code)
         [('a[2]', 'b[2]'),
          ('foo[1]', 'bar[3]'), ('foo[0]', 'bar[2]'),
          ('x', '0'),
-         ('out[4]', 'in1[3]'), ('out[3]', 'in2[1]'), ('out[0]', 'in2[0]'), ('out[1]', 'in3[2]')
+         ('out[4]', 'in1[3]'), ('out[3]', 'in2[1]'), ('out[0]', 'in2[0]'), ('out[1]', 'in3[2]'),
+         ('out[3]', 'in1[0]'), ('out[2]', 'in1[1]'), ('out[1]', 'in2[1]'), ('out[0]', 'in2[0]')
         ]
-
-    TODO: This method can't handle range or concatenated assignments to full
-    variables. For example in the following case it will cause a "Bit width
-    mismatch" error even if `out` is a 4 bit variable, because it doesn't know
-    that:
-
-        assign out = { in1[0:1], in2[0:1] }
     '''
     expreg = r"assign\s+(.*?)\s*=\s*(.*?);"
     assigns = re.findall(expreg, content)
@@ -429,8 +436,16 @@ def parse_assigns(content):
     for lhs, rhs in assigns:
         lhs_bits = expand_concat(lhs)
         rhs_bits = expand_concat(rhs)
+
+        # if LHS is a single bare name but RHS is wide, expand LHS to match
+        if len(lhs_bits) == 1 and lhs_bits[0] == lhs and len(rhs_bits) > 1:
+            width = len(rhs_bits)
+            # msb = width-1 down to 0
+            lhs_bits = [f"{lhs}[{i}]" for i in range(width - 1, -1, -1)]
+
         if len(lhs_bits) != len(rhs_bits):
             raise ValueError(f"Bit width mismatch: LHS {lhs_bits} != RHS {rhs_bits}")
+
         result.extend(zip(lhs_bits, rhs_bits))
     return result
 
diff --git a/runner.py b/runner.py
index 34a5e0c..8e599ea 100644
--- a/runner.py
+++ b/runner.py
@@ -18,7 +18,7 @@
 
 # Directory to output build files to.
 # TODO: Consider making this a config parameter.
-BUILD_DIR = "build"
+BUILD_DIR = f"{os.path.dirname(__file__)}/build"
 
 # Files generated by the different methods. Defined here to just reuse in the
 # code.
@@ -170,7 +170,6 @@ def _write_results_to_csv(
         if not file_exists:
             writer.writerow(config.csv_columns())
 
-        print("VALIDATION RESULTS:", validation_results)
         writer.writerow(config.csv_values(results, validation_results))
 
 

From f6973f3e725d85762f65595ee39851fe15516b47 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Mon, 19 May 2025 15:36:47 -0600
Subject: [PATCH 47/65] Improve parsing of constants

---
 netlist.py | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/netlist.py b/netlist.py
index 55f1569..534516a 100644
--- a/netlist.py
+++ b/netlist.py
@@ -348,15 +348,16 @@ def expand_range(expr):
 
 def expand_constant(expr):
     """
-    Expands a Verilog-style constant variable expression into a flat list of
-    individual bits.
+    Expands a Verilog-style constant into a flat list of individual bits.
 
     Parameters
     ----------
     expr : string
-        A Verilog constant like "3'h6". Currently only supports hexadecimal
-        expressions, but that should be enough since that's how yosys assigns
-        constants.
+        A Verilog constant like "3'h6" or "4'd13". Only hexadecimal ('h) and
+        decimal ('d) formats are supported.
+        TODO: Support other bases like binary ('b) or octal ('o). Support for
+        these hasn't been added because we haven't run into a scenario where
+        yosys assigns a constant with these bases.
 
     Returns
     -------
@@ -370,15 +371,23 @@ def expand_constant(expr):
 
         >>> expand_constant("4'hd")
         [1, 1, 0, 1]
+
+        >>> expand_constant("4'd13")
+        [1, 1, 0, 1]
     """
-    size, value = expr.split("'h")
+    size, rest = expr.split("'")
     size = int(size)
-    value = value.lower()
+    base = rest[0].lower()
+    value = rest[1:]
 
-    int_value = int(value, 16)
+    if base == "h":
+        int_value = int(value, 16)
+    elif base == "d":
+        int_value = int(value, 10)
+    else:
+        raise ValueError(f"Unsupported constant format: {expr}")
 
     bits = [(int_value >> i) & 1 for i in range(size - 1, -1, -1)]
-
     return bits
 
 

From 73a6ca8ce7b749ed431160d91590d35ca9e5e585 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 22 May 2025 01:30:09 -0600
Subject: [PATCH 48/65] Adapt inout and probprun methods to work with batches

---
 __main__.py      |  19 ++++++
 configuration.py |  23 ++++++-
 runner.py        | 162 +++++++++++++++++++++++++++++++++--------------
 3 files changed, 155 insertions(+), 49 deletions(-)

diff --git a/__main__.py b/__main__.py
index 62928dc..b5e2edb 100644
--- a/__main__.py
+++ b/__main__.py
@@ -28,6 +28,18 @@ def parse_generate(value):
                 f"Invalid generate_dataset value: {value}. Must be int or float."
             )
 
+def parse_positive_integer(value):
+    try:
+        n = int(value)
+    except ValueError:
+        raise argparse.ArgumentTypeError(
+            "Invalid value: {value}. Must be a positive integer."
+        )
+    if n < 1:
+        raise argparse.ArgumentTypeError(f"Invalid value: {value}. Should be a positive integer, x >= 1.")
+    return n
+
+
 
 def main():
     parser = argparse.ArgumentParser(
@@ -76,6 +88,7 @@ def main():
                 error=args.error,
                 validation=args.validation,
                 max_iters=args.max_iters,
+                prunes_per_iteration=args.prunes_per_iteration,
                 max_depth=args.max_depth,
                 one_tree_per_output=args.one_tree_per_output,
                 show_progress=args.show_progress,
@@ -145,6 +158,12 @@ def run_arguments(run_parser):
         type=int,
         help="Maximum number of iterations for iterative methods.",
     )
+    run_parser.add_argument(
+        "--prunes-per-iteration",
+        type=parse_positive_integer,
+        default=1,
+        help="Number of prunes carried out each iteration. Affects pruning methods except ccarving since it already prunes multiple nodes at a time.",
+    )
     run_parser.add_argument(
         "--max-depth", type=int, help="Max depth for decision_tree method"
     )
diff --git a/configuration.py b/configuration.py
index cc7fa61..1d77c67 100644
--- a/configuration.py
+++ b/configuration.py
@@ -98,7 +98,7 @@ class ApproxSynthesisConfig:
 
     error : float (0 < x <= 1), optional
         The maximum error threshold permitted. Required for iterative methods,
-        like pruning methods or ML methods with resynthesis.
+        i.e. pruning methods.
 
         The error used is the Mean Relative Error Distance.
 
@@ -119,7 +119,16 @@ class ApproxSynthesisConfig:
 
     max_iters : int, optional
         Maximum amount of iterations to execute. Used in iterative methods,
-        like pruning methods or ML methods with resynthesis.
+        i.e. pruning methods.
+
+    prunes_per_iteration : int, default=1
+        Number of pruning operations to perform per iteration during ALS.
+        Increasing this value can speed up pruning-based methods by reducing the
+        number of iterations. It doesn't affect ccarving since it already prunes
+        multiple nodes per iteration. If the resulting circuit exceeds the error
+        threshold, the algorithm backtracks to the last valid state before
+        continuing. If this parameter is too large, backtracking may take longer
+        than the initial search, especially for small circuits.
 
     max_depth : int, optional
         Required for 'decision_tree'.
@@ -177,6 +186,7 @@ class ApproxSynthesisConfig:
     error: float | None
     validation: float | None
     max_iters: int | None
+    prunes_per_iteration: int
     max_depth: int | None
     one_tree_per_output: bool
     output_significances: list[int] | None
@@ -193,6 +203,7 @@ def __init__(
         error: float | None = None,
         validation: float | None = None,
         max_iters: int | None = None,
+        prunes_per_iteration: int = 1,
         max_depth: int | None = None,
         one_tree_per_output: bool = False,
         output_significances: list[int] | None = None,
@@ -216,6 +227,7 @@ def __init__(
         self.error = _validate_error(error, self.method)
         self.validation = _validate_validation(validation)
         self.max_iters = max_iters
+        self.prunes_per_iteration = _validate_prunes_per_iteration(prunes_per_iteration)
 
         self.max_depth = _validate_max_depth(max_depth, self.method)
         self.one_tree_per_output = one_tree_per_output
@@ -356,7 +368,6 @@ def _validate_error(
 
     Required for:
     - all iterative methods
-    - methods that become iterative with resynthesis, like decision_tree
 
     Raises ValueError if missing in those cases.
     """
@@ -428,3 +439,9 @@ def _validate_output_significances(
             )
 
     return output_significances
+
+
+def _validate_prunes_per_iteration(prunes_per_iteration: int) -> int:
+    if prunes_per_iteration < 1:
+        raise ValueError("prunes_per_iteration must be at least 1.")
+    return prunes_per_iteration
diff --git a/runner.py b/runner.py
index 8e599ea..110c491 100644
--- a/runner.py
+++ b/runner.py
@@ -245,35 +245,58 @@ def _run_constant_inputs_outputs(
     max_iters = config.max_iters if config.max_iters else float("inf")
 
     while iteration < max_iters:
-        const_variables = _get_lsbs_up_to(circuit_variables, max_const_bit)
         deletable_nodes: list[ElementTree.Element]
-
-        match inputs_or_outputs:
-            case "inputs":
-                deletable_nodes = GetInputs(circuit.netl_root, const_variables)
-            case "outputs":
-                deletable_nodes = GetOutputs(circuit.netl_root, const_variables)
-            case _:
-                raise ValueError("Invalid call to _run_constant_inputs_outputs")
-
-        # Filter Already deleted nodes
-        deletable_nodes = [
-            node for node in deletable_nodes if node.get("delete") != "yes"
-        ]
-
-        if len(deletable_nodes) == 0:
-            if set(const_variables) == set(circuit_variables):
-                # All variables have been set as const and all elected nodes
-                # have been deleted
-                return circuit
-            else:
-                max_const_bit += 1
-                continue
-
-        node_to_delete = deletable_nodes.pop(0)
-
-        print(f"Iteration {iteration+1}: Pruning node {node_to_delete.attrib['var']}")
-        node_to_delete.set("delete", "yes")
+        nodes_to_delete: list[ElementTree.Element] = []
+
+        while len(nodes_to_delete) < config.prunes_per_iteration:
+            const_variables = _get_lsbs_up_to(circuit_variables, max_const_bit)
+
+            match inputs_or_outputs:
+                case "inputs":
+                    deletable_nodes = GetInputs(circuit.netl_root, const_variables)
+                case "outputs":
+                    deletable_nodes = GetOutputs(circuit.netl_root, const_variables)
+                case _:
+                    raise ValueError("Invalid call to _run_constant_inputs_outputs")
+
+            # Filter Already deleted nodes
+            deletable_nodes = [
+                node
+                for node in deletable_nodes
+                if (
+                    node.get("delete") != "yes"
+                    and node.attrib["var"]
+                    not in [
+                        node_to_delete.attrib["var"]
+                        for node_to_delete in nodes_to_delete
+                    ]
+                )
+            ]
+
+            if len(deletable_nodes) == 0:
+                if set(const_variables) == set(circuit_variables):
+                    # All variables have been set as const and all elected nodes
+                    # have been deleted. This mean we already deleted all
+                    # possible nodes that could be deleted.
+                    if len(nodes_to_delete) != 0:
+                        # There might be some nodes to delete from a previous
+                        # iteration of this loop, so finish deleting those.
+                        break
+                    else:
+                        # Nothing left to do, finish execution.
+                        return circuit
+                else:
+                    max_const_bit += 1
+                    continue
+
+            max_nodes_to_append = config.prunes_per_iteration - len(nodes_to_delete)
+            nodes_to_delete.extend(deletable_nodes[:max_nodes_to_append])
+
+        nodes_to_delete_names = [node.attrib["var"] for node in nodes_to_delete]
+        print(f"Iteration {iteration + 1}: Pruning nodes {nodes_to_delete_names}")
+
+        for node in nodes_to_delete:
+            node.set("delete", "yes")
 
         if config.resynthesis:
             circuit.resynth()
@@ -284,9 +307,10 @@ def _run_constant_inputs_outputs(
 
         print(f"Pruned circuit error: {error}")
 
-        if iteration > 0 and error > config.error:
-            print("Error has overpassed threshold, undoing last prune\n")
-            node_to_delete.set("delete", "no")
+        if error > config.error:
+            print("Error has overpassed threshold, backtracking...\n")
+            _undo_prunes(circuit, nodes_to_delete, config.error)
+            os.replace(TEMP_OUTPUT, APPROX_OUTPUT)
             break
 
         iteration += 1
@@ -316,20 +340,37 @@ def _run_probprun(config: ApproxSynthesisConfig) -> Circuit:
     iteration = 0
     max_iters = config.max_iters if config.max_iters else float("inf")
 
-    for node, output, time_percent in GetOneNode(circuit_root):
-        if iteration >= max_iters:
-            break
+    probprun = GetOneNode(circuit_root)
+    while iteration < max_iters:
+        nodes_to_delete = []
+        nodes_info = []
 
-        node_to_delete = circuit_root.find(f"./node[@var='{node}']")
+        for (node, output, time_percent), _ in zip(
+            probprun, range(config.prunes_per_iteration)
+        ):
+            node_to_delete = circuit_root.find(f"./node[@var='{node}']")
 
-        assert node_to_delete is not None, (
-            f"Node {node} suggested by ProbPrun should be findable in the circuit"
-        )
+            assert node_to_delete is not None, (
+                f"Node {node} suggested by ProbPrun should be findable in the circuit"
+            )
+
+            nodes_to_delete.append(node_to_delete)
+            nodes_info.append((output, time_percent))
+
+        if len(nodes_to_delete) == 0:
+            # If no nodes were appended it means there's no nodes left to delete
+            return circuit
+
+        nodes_to_delete_names = [node.attrib["var"] for node in nodes_to_delete]
 
         print(
-            f"Iteration {iteration+1}: Pruning node {node_to_delete} because it's {output} {time_percent}% of the time"
+            f"Iteration {iteration + 1}: Pruning nodes {nodes_to_delete_names} because:"
         )
-        node_to_delete.set("delete", "yes")
+        for node, (output, time_percent) in zip(nodes_to_delete_names, nodes_info):
+            print(f"{node} is {output} {time_percent}% of the time")
+
+        for node in nodes_to_delete:
+            node.set("delete", "yes")
 
         if config.resynthesis:
             circuit.resynth()
@@ -340,9 +381,10 @@ def _run_probprun(config: ApproxSynthesisConfig) -> Circuit:
 
         print(f"Pruned circuit error: {error}")
 
-        if iteration > 0 and error > config.error:
-            print("Error has overpassed threshold, undoing last prune\n")
-            node_to_delete.set("delete", "no")
+        if error > config.error:
+            print("Error has overpassed threshold, backtracking...\n")
+            _undo_prunes(circuit, nodes_to_delete, config.error)
+            os.replace(TEMP_OUTPUT, APPROX_OUTPUT)
             break
 
         iteration += 1
@@ -377,7 +419,9 @@ def _run_significance(config: ApproxSynthesisConfig) -> Circuit:
             f"Node {node} suggested by GetbySignificance should be findable in the circuit"
         )
 
-        print(f"Iteration {iteration+1}: Pruning node {node} because its significance is {significance}")
+        print(
+            f"Iteration {iteration + 1}: Pruning node {node} because its significance is {significance}"
+        )
         node_to_delete.set("delete", "yes")
 
         if config.resynthesis:
@@ -389,7 +433,7 @@ def _run_significance(config: ApproxSynthesisConfig) -> Circuit:
 
         print(f"Pruned circuit error: {error}")
 
-        if iteration > 0 and error > config.error:
+        if error > config.error:
             print("Error has overpassed threshold, undoing last prune\n")
             node_to_delete.set("delete", "no")
             break
@@ -437,7 +481,9 @@ def _run_ccarving(config: ApproxSynthesisConfig) -> Circuit:
         nodes_to_delete = cuts[0]
         nodes_to_delete_names = [n.attrib["var"] for n in nodes_to_delete]
 
-        print(f"Iteration {iteration+1}: Pruning nodes {nodes_to_delete_names} as a single cut...\n")
+        print(
+            f"Iteration {iteration + 1}: Pruning nodes {nodes_to_delete_names} as a single cut...\n"
+        )
         [n.set("delete", "yes") for n in nodes_to_delete]
 
         if config.resynthesis:
@@ -578,3 +624,27 @@ def _copy_last_n_lines(input_file: str, output_file: str, n: int) -> None:
 
     with open(output_file, "w") as outfile:
         outfile.writelines(last_n_lines)
+
+
+def _undo_prunes(
+    circuit, deleted_nodes: list[ElementTree.Element], error_threshold: float
+):
+    """
+    Will set the deleted_nodes "delete" propert to "no". Then simulates the
+    circuit and if the error is less than the error_threshold it returns.
+    Meant for backtracking the last iteration of prunes when the error threshold
+    is surpassed.
+    """
+    for node in reversed(deleted_nodes):
+        print(f"Undoing prune on node {node.attrib['var']}")
+        node.set("delete", "no")
+        error = circuit.simulate_and_compute_error(
+            TB, EXACT_OUTPUT, TEMP_OUTPUT, Metric.MEAN_RELATIVE_ERROR_DISTANCE
+        )
+        print(f"New error: {error}")
+        if error < error_threshold:
+            print("Error back to being under threshold, backtracking finished")
+            return
+
+    print("Reverted all prunes.")
+    return

From d01ce5f59834ce985c83032aaaa58c8b2911f7d9 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 22 May 2025 02:34:28 -0600
Subject: [PATCH 49/65] Fix backtracking when resynthesizing

---
 runner.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/runner.py b/runner.py
index 110c491..4c9ceee 100644
--- a/runner.py
+++ b/runner.py
@@ -373,20 +373,27 @@ def _run_probprun(config: ApproxSynthesisConfig) -> Circuit:
             node.set("delete", "yes")
 
         if config.resynthesis:
-            circuit.resynth()
-
-        error = circuit.simulate_and_compute_error(
-            TB, EXACT_OUTPUT, TEMP_OUTPUT, Metric.MEAN_RELATIVE_ERROR_DISTANCE
-        )
+            resynth_circuit = copy.copy(circuit)
+            resynth_circuit.resynth()
+            error = resynth_circuit.simulate_and_compute_error(
+                TB, EXACT_OUTPUT, TEMP_OUTPUT, Metric.MEAN_RELATIVE_ERROR_DISTANCE
+            )
+        else:
+            error = circuit.simulate_and_compute_error(
+                TB, EXACT_OUTPUT, TEMP_OUTPUT, Metric.MEAN_RELATIVE_ERROR_DISTANCE
+            )
 
         print(f"Pruned circuit error: {error}")
 
         if error > config.error:
             print("Error has overpassed threshold, backtracking...\n")
-            _undo_prunes(circuit, nodes_to_delete, config.error)
+            _undo_prunes(circuit, nodes_to_delete, config.error, config.resynthesis)
             os.replace(TEMP_OUTPUT, APPROX_OUTPUT)
             break
 
+        if config.resynthesis:
+            circuit = resynth_circuit
+
         iteration += 1
         os.replace(TEMP_OUTPUT, APPROX_OUTPUT)
 

From a54c178683733f498d87d1a0224eb2411b89bf8c Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 22 May 2025 03:03:27 -0600
Subject: [PATCH 50/65] Fix inout const methods to work more efficiently and
 fix pruning methods that resynthesize to properly handle the resynthesized
 circuit

---
 runner.py | 40 ++++++++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/runner.py b/runner.py
index 4c9ceee..e75abc7 100644
--- a/runner.py
+++ b/runner.py
@@ -260,7 +260,7 @@ def _run_constant_inputs_outputs(
                     raise ValueError("Invalid call to _run_constant_inputs_outputs")
 
             # Filter Already deleted nodes
-            deletable_nodes = [
+            deletable_nodes_filtered = [
                 node
                 for node in deletable_nodes
                 if (
@@ -273,11 +273,15 @@ def _run_constant_inputs_outputs(
                 )
             ]
 
-            if len(deletable_nodes) == 0:
+            max_nodes_to_append = config.prunes_per_iteration - len(nodes_to_delete)
+            nodes_to_delete.extend(deletable_nodes_filtered[:max_nodes_to_append])
+
+            if len(nodes_to_delete) == len(deletable_nodes):
+                # If all deletable nodes are in nodes_to_delete, we can increase
+                # max_const_bit and go again or exit if all the eligible
+                # variables (i.e. all inputs or all outputs) have been tried out
+                # as const variables.
                 if set(const_variables) == set(circuit_variables):
-                    # All variables have been set as const and all elected nodes
-                    # have been deleted. This mean we already deleted all
-                    # possible nodes that could be deleted.
                     if len(nodes_to_delete) != 0:
                         # There might be some nodes to delete from a previous
                         # iteration of this loop, so finish deleting those.
@@ -286,12 +290,11 @@ def _run_constant_inputs_outputs(
                         # Nothing left to do, finish execution.
                         return circuit
                 else:
+                    # All deletable nodes were added to nodes_to_delete but
+                    # there's still more nodes to delete, increase max_const_bit
                     max_const_bit += 1
                     continue
 
-            max_nodes_to_append = config.prunes_per_iteration - len(nodes_to_delete)
-            nodes_to_delete.extend(deletable_nodes[:max_nodes_to_append])
-
         nodes_to_delete_names = [node.attrib["var"] for node in nodes_to_delete]
         print(f"Iteration {iteration + 1}: Pruning nodes {nodes_to_delete_names}")
 
@@ -299,19 +302,28 @@ def _run_constant_inputs_outputs(
             node.set("delete", "yes")
 
         if config.resynthesis:
-            circuit.resynth()
-
-        error = circuit.simulate_and_compute_error(
-            TB, EXACT_OUTPUT, TEMP_OUTPUT, Metric.MEAN_RELATIVE_ERROR_DISTANCE
-        )
+            resynth_circuit = copy.copy(circuit)
+            resynth_circuit.resynth()
+            error = resynth_circuit.simulate_and_compute_error(
+                TB, EXACT_OUTPUT, TEMP_OUTPUT, Metric.MEAN_RELATIVE_ERROR_DISTANCE
+            )
+        else:
+            error = circuit.simulate_and_compute_error(
+                TB, EXACT_OUTPUT, TEMP_OUTPUT, Metric.MEAN_RELATIVE_ERROR_DISTANCE
+            )
 
         print(f"Pruned circuit error: {error}")
 
         if error > config.error:
             print("Error has overpassed threshold, backtracking...\n")
-            _undo_prunes(circuit, nodes_to_delete, config.error)
+            circuit = _undo_prunes(
+                circuit, nodes_to_delete, config.error, config.resynthesis
+            )
             os.replace(TEMP_OUTPUT, APPROX_OUTPUT)
             break
+        else:
+            if config.resynthesis:
+                circuit = resynth_circuit
 
         iteration += 1
         os.replace(TEMP_OUTPUT, APPROX_OUTPUT)

From c2e3d3462ec4b67aaa11000712b400b8442ae301 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 22 May 2025 03:04:26 -0600
Subject: [PATCH 51/65] Fix handling of returned circuit for probprun

---
 runner.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/runner.py b/runner.py
index e75abc7..1d7230d 100644
--- a/runner.py
+++ b/runner.py
@@ -399,12 +399,14 @@ def _run_probprun(config: ApproxSynthesisConfig) -> Circuit:
 
         if error > config.error:
             print("Error has overpassed threshold, backtracking...\n")
-            _undo_prunes(circuit, nodes_to_delete, config.error, config.resynthesis)
+            circuit = _undo_prunes(
+                circuit, nodes_to_delete, config.error, config.resynthesis
+            )
             os.replace(TEMP_OUTPUT, APPROX_OUTPUT)
             break
-
-        if config.resynthesis:
-            circuit = resynth_circuit
+        else:
+            if config.resynthesis:
+                circuit = resynth_circuit
 
         iteration += 1
         os.replace(TEMP_OUTPUT, APPROX_OUTPUT)

From ab999220cdc2161b1c4953fd90a31a2e0e2cc7cb Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 22 May 2025 03:10:41 -0600
Subject: [PATCH 52/65] add prunes_per_iteration functionality to
 _run_significance

---
 runner.py | 53 +++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 37 insertions(+), 16 deletions(-)

diff --git a/runner.py b/runner.py
index 1d7230d..95930a9 100644
--- a/runner.py
+++ b/runner.py
@@ -430,34 +430,55 @@ def _run_significance(config: ApproxSynthesisConfig) -> Circuit:
     else:
         output_significances = []
 
-    for node, significance in GetbySignificance(circuit_root, output_significances):
-        if iteration >= max_iters:
-            break
+    nodes_sorted_by_significance = GetbySignificance(circuit_root, output_significances)
+    while iteration < max_iters:
+        nodes_to_delete = []
+        nodes_info = []
 
-        node_to_delete = circuit_root.find(f"./node[@var='{node}']")
+        for (node, significance), _ in zip(
+            nodes_sorted_by_significance, range(config.prunes_per_iteration)
+        ):
+            node_to_delete_ = circuit_root.find(f"./node[@var='{node}']")
 
-        assert node_to_delete is not None, (
-            f"Node {node} suggested by GetbySignificance should be findable in the circuit"
-        )
+            assert node_to_delete_ is not None, (
+                f"Node {node} suggested by GetbySignificance should be findable in the circuit"
+            )
+            nodes_to_delete.append(node_to_delete_)
+            nodes_info.append(significance)
 
+        nodes_to_delete_names = [node.attrib["var"] for node in nodes_to_delete]
         print(
-            f"Iteration {iteration + 1}: Pruning node {node} because its significance is {significance}"
+            f"Iteration {iteration + 1}: Pruning nodes {nodes_to_delete_names} because:"
         )
-        node_to_delete.set("delete", "yes")
+        for node, significance in zip(nodes_to_delete_names, nodes_info):
+            print(f"{node} has {significance} significance")
 
-        if config.resynthesis:
-            circuit.resynth()
+        for node in nodes_to_delete:
+            node.set("delete", "yes")
 
-        error = circuit.simulate_and_compute_error(
-            TB, EXACT_OUTPUT, TEMP_OUTPUT, Metric.MEAN_RELATIVE_ERROR_DISTANCE
-        )
+        if config.resynthesis:
+            resynth_circuit = copy.copy(circuit)
+            resynth_circuit.resynth()
+            error = resynth_circuit.simulate_and_compute_error(
+                TB, EXACT_OUTPUT, TEMP_OUTPUT, Metric.MEAN_RELATIVE_ERROR_DISTANCE
+            )
+        else:
+            error = circuit.simulate_and_compute_error(
+                TB, EXACT_OUTPUT, TEMP_OUTPUT, Metric.MEAN_RELATIVE_ERROR_DISTANCE
+            )
 
         print(f"Pruned circuit error: {error}")
 
         if error > config.error:
-            print("Error has overpassed threshold, undoing last prune\n")
-            node_to_delete.set("delete", "no")
+            print("Error has overpassed threshold, backtracking...\n")
+            circuit = _undo_prunes(
+                circuit, nodes_to_delete, config.error, config.resynthesis
+            )
+            os.replace(TEMP_OUTPUT, APPROX_OUTPUT)
             break
+        else:
+            if config.resynthesis:
+                circuit = resynth_circuit
 
         iteration += 1
         os.replace(TEMP_OUTPUT, APPROX_OUTPUT)

From 19eabb6c82840dacbde3b078620a00021bb07eb5 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Thu, 22 May 2025 03:24:08 -0600
Subject: [PATCH 53/65] Finish fixing behaviour when resynthesizing and
 implementing backtracking for ccarving.

---
 runner.py | 68 +++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 49 insertions(+), 19 deletions(-)

diff --git a/runner.py b/runner.py
index 95930a9..0b0aff3 100644
--- a/runner.py
+++ b/runner.py
@@ -1,6 +1,7 @@
 from collections import deque
 from collections.abc import Callable
 import csv
+import copy
 import os
 import time
 from xml.etree import ElementTree
@@ -488,7 +489,6 @@ def _run_significance(config: ApproxSynthesisConfig) -> Circuit:
 
 def _run_ccarving(config: ApproxSynthesisConfig) -> Circuit:
     circuit = config.circuit
-    circuit_root = circuit.netl_root
 
     assert config.error is not None, (
         f"'error' should be given when executing {config.method}"
@@ -513,12 +513,16 @@ def _run_ccarving(config: ApproxSynthesisConfig) -> Circuit:
     # as a config parameter.
     diff = "significance"
 
-    LabelCircuit(circuit_root, output_significances)
-
     while iteration < max_iters:
         print("Finding cuts...", flush=True)
+        circuit_root = circuit.netl_root
 
-        cuts = FindCut(circuit_root, diff_threshold, diff, harshness_level)
+        LabelCircuit(circuit_root, output_significances)
+        cuts = FindCut(circuit.netl_root, diff_threshold, diff, harshness_level)
+
+        if len(cuts) == 0:
+            print("No more cuts left to make")
+            return circuit
 
         nodes_to_delete = cuts[0]
         nodes_to_delete_names = [n.attrib["var"] for n in nodes_to_delete]
@@ -529,18 +533,28 @@ def _run_ccarving(config: ApproxSynthesisConfig) -> Circuit:
         [n.set("delete", "yes") for n in nodes_to_delete]
 
         if config.resynthesis:
-            circuit.resynth()
-
-        error = circuit.simulate_and_compute_error(
-            TB, EXACT_OUTPUT, TEMP_OUTPUT, Metric.MEAN_RELATIVE_ERROR_DISTANCE
-        )
+            resynth_circuit = copy.copy(circuit)
+            resynth_circuit.resynth()
+            error = resynth_circuit.simulate_and_compute_error(
+                TB, EXACT_OUTPUT, TEMP_OUTPUT, Metric.MEAN_RELATIVE_ERROR_DISTANCE
+            )
+        else:
+            error = circuit.simulate_and_compute_error(
+                TB, EXACT_OUTPUT, TEMP_OUTPUT, Metric.MEAN_RELATIVE_ERROR_DISTANCE
+            )
 
         print(f"Pruned circuit error: {error}")
 
-        if iteration > 0 and error > config.error:
-            print("Error has overpassed threshold, undoing last prune\n")
-            [n.set("delete", "no") for n in nodes_to_delete]
+        if error > config.error:
+            print("Error has overpassed threshold, backtracking...\n")
+            circuit = _undo_prunes(
+                circuit, nodes_to_delete, config.error, config.resynthesis
+            )
+            os.replace(TEMP_OUTPUT, APPROX_OUTPUT)
             break
+        else:
+            if config.resynthesis:
+                circuit = resynth_circuit
 
         iteration += 1
         os.replace(TEMP_OUTPUT, APPROX_OUTPUT)
@@ -669,8 +683,11 @@ def _copy_last_n_lines(input_file: str, output_file: str, n: int) -> None:
 
 
 def _undo_prunes(
-    circuit, deleted_nodes: list[ElementTree.Element], error_threshold: float
-):
+    circuit,
+    deleted_nodes: list[ElementTree.Element],
+    error_threshold: float,
+    resynthesis: bool,
+) -> Circuit:
     """
     Will set the deleted_nodes "delete" propert to "no". Then simulates the
     circuit and if the error is less than the error_threshold it returns.
@@ -680,13 +697,26 @@ def _undo_prunes(
     for node in reversed(deleted_nodes):
         print(f"Undoing prune on node {node.attrib['var']}")
         node.set("delete", "no")
-        error = circuit.simulate_and_compute_error(
-            TB, EXACT_OUTPUT, TEMP_OUTPUT, Metric.MEAN_RELATIVE_ERROR_DISTANCE
-        )
+        if resynthesis:
+            resynth_circuit = copy.copy(circuit)
+            resynth_circuit.resynth()
+            error = resynth_circuit.simulate_and_compute_error(
+                TB, EXACT_OUTPUT, TEMP_OUTPUT, Metric.MEAN_RELATIVE_ERROR_DISTANCE
+            )
+        else:
+            error = circuit.simulate_and_compute_error(
+                TB, EXACT_OUTPUT, TEMP_OUTPUT, Metric.MEAN_RELATIVE_ERROR_DISTANCE
+            )
         print(f"New error: {error}")
         if error < error_threshold:
             print("Error back to being under threshold, backtracking finished")
-            return
+            if resynthesis:
+                return resynth_circuit
+            else:
+                return circuit
 
     print("Reverted all prunes.")
-    return
+    if resynthesis:
+        return resynth_circuit
+    else:
+        return circuit

From e0d3d1d8f51146ad86172816ded0b88ecb9ddbc4 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Wed, 4 Jun 2025 18:13:30 -0600
Subject: [PATCH 54/65] Fix bug in inconst/outconst runner method

---
 runner.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/runner.py b/runner.py
index 0b0aff3..3432905 100644
--- a/runner.py
+++ b/runner.py
@@ -275,9 +275,11 @@ def _run_constant_inputs_outputs(
             ]
 
             max_nodes_to_append = config.prunes_per_iteration - len(nodes_to_delete)
-            nodes_to_delete.extend(deletable_nodes_filtered[:max_nodes_to_append])
+            for node in deletable_nodes_filtered[:max_nodes_to_append]:
+                nodes_to_delete.append(node)
+                deletable_nodes_filtered.remove(node)
 
-            if len(nodes_to_delete) == len(deletable_nodes):
+            if len(deletable_nodes_filtered) == 0:
                 # If all deletable nodes are in nodes_to_delete, we can increase
                 # max_const_bit and go again or exit if all the eligible
                 # variables (i.e. all inputs or all outputs) have been tried out

From 8ae13b448d8c92eef83c83d248e75da71ed69926 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Sun, 14 Sep 2025 11:48:07 -0600
Subject: [PATCH 55/65] Fix is_node_deletable function to take into account if
 the node is an output itself

---
 circuit.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/circuit.py b/circuit.py
index afd3009..25c1f01 100644
--- a/circuit.py
+++ b/circuit.py
@@ -146,10 +146,10 @@ def is_node_deletable(self, node):
         Returns true if a node can be deleted, returns false if the node should
         be assigned a constant instead.
 
-        A node can be deleted if all its children nodes will be deleted as
-        well. If a node has children nodes or connects directly to an output of
-        the circuit, then the funcction will return false and the node should
-        be replaced with a constant.
+        A node can be deleted only if all its child nodes are also being
+        deleted. If the node has children, is connected directly to a circuit
+        output, or is itself a circuit output, the function returns false and
+        the node should be replaced with a constant.
 
         Parameters
         ----------
@@ -182,7 +182,9 @@ def is_node_deletable(self, node):
         some_children_not_deleted = len(node_children_to_be_deleted) < len(node_children)
 
         node_has_outputs = connects_to_output or some_children_not_deleted
-        node_can_be_deleted = not node_has_outputs
+        node_is_output = wire in self.outputs
+
+        node_can_be_deleted = not node_has_outputs and not node_is_output
 
         return node_can_be_deleted
 

From 5e2f1b35da7f55bd65b3583f9414f4c6cee3d189 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Sun, 14 Sep 2025 11:48:50 -0600
Subject: [PATCH 56/65] Improve shuffle bag comment

---
 circuit.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/circuit.py b/circuit.py
index 25c1f01..2ff22cc 100644
--- a/circuit.py
+++ b/circuit.py
@@ -687,6 +687,11 @@ def generate_dataset(self, filename, samples, distribution='uniform', **kwargs):
         if distribution == "shuffle_bag":
             # Shuffle bag needs to generate all the inputs together to ensure
             # avoiding repetition of the circuit's inputs as a whole.
+            #
+            # This means that if the circuit has 2 inputs of 4 bits, we don't
+            # want to generate all possible 4 bit combinations for each input.
+            # We want to generate all possible 8 bit combinations and then split
+            # those into 2 4 bit inputs.
             total_bits = sum(inputs_info.values())
             inputs = get_random(total_bits, distribution, samples, **kwargs)
             for input in inputs:

From a76ffd5e08c210c4ab0f64ef65e6038e2f598845 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Sun, 14 Sep 2025 11:49:05 -0600
Subject: [PATCH 57/65] Delete unused import

---
 circuit.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/circuit.py b/circuit.py
index 2ff22cc..c206649 100644
--- a/circuit.py
+++ b/circuit.py
@@ -4,7 +4,6 @@
 
 from graphviz import Digraph
 from os import path, remove, system, rename
-from random import randint
 from re import findall
 import xml.etree.ElementTree as ET
 

From e0b774ecaebda3c2e2b8195445ab76fbd90e3a91 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Sun, 14 Sep 2025 11:49:37 -0600
Subject: [PATCH 58/65] Improve error message if outputs don't match

---
 circuiterror.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/circuiterror.py b/circuiterror.py
index b8cb5b5..5de4e36 100644
--- a/circuiterror.py
+++ b/circuiterror.py
@@ -51,7 +51,12 @@ def compute_error(metric, original, approximate) -> float:
     original_len = len(original_output)
     approx_len = len(approximate_output)
 
-    assert original_len == approx_len, f"The output of the original and the approximate simulations doesn't match: {original_len}!={approx_len}. Make sure both outputs are being generated correctly."
+    assert original_len == approx_len, f"""
+The output of the original and the approximate simulations doesn't match: {original_len}!={approx_len}.
+Make sure both outputs are being generated correctly.
+Original output: {original}
+Approximate output: {approximate}
+"""
 
     # compute the error distance ED := |a - a'|
     error_distance = [abs(original_output[x] - approximate_output[x])

From 43c1672aaea15cb142719785519ca63be5e38d3b Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Sun, 14 Sep 2025 11:50:20 -0600
Subject: [PATCH 59/65] Fix hamming distance bitwise xor to use object dtype so
 it works regardless of input size

---
 circuiterror.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/circuiterror.py b/circuiterror.py
index 5de4e36..6154498 100644
--- a/circuiterror.py
+++ b/circuiterror.py
@@ -71,7 +71,7 @@ def compute_error(metric, original, approximate) -> float:
 
     # Mean Hamming Distance see: https://stackoverflow.com/questions/40875282/fastest-way-to-get-hamming-distance-for-integer-array
     if (metric == "hd"):
-        hamming_distance=np.bitwise_xor(original_output,approximate_output)
+        hamming_distance=np.bitwise_xor(original_output,approximate_output, dtype=object)
         hamming_distance=[f'{hd:b}'.count('1') for hd in hamming_distance]
         return round(float(np.mean(hamming_distance)),3)
 

From 690e9c76c607ea6a5a8dcdd8f1059d5d5adf91ed Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Sun, 14 Sep 2025 11:52:26 -0600
Subject: [PATCH 60/65] Add -q flag to yosys commands to make output cleaner

---
 synthesis.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/synthesis.py b/synthesis.py
index 8491d59..1815e9f 100644
--- a/synthesis.py
+++ b/synthesis.py
@@ -44,7 +44,7 @@ def synthesis (rtl, tech, topmodule):
 
     # - - - - - - - - - - - - - - - Execute yosys - - - - - - - - - - - - - -
 
-    os.system ('yosys synth.ys;')
+    os.system ('yosys -q synth.ys;')
 
     # - - - - - - - - - - - - - Delete temporal Files - - - - - - - - - - - -
 
@@ -90,7 +90,7 @@ def resynthesis(netlist, tech, topmodule):
 
     # - - - - - - - - - - - - - - - Execute yosys - - - - - - - - - - - - - -
 
-    os.system ('yosys resynth.ys;')
+    os.system ('yosys -q resynth.ys;')
 
     # - - - - - - - - - - - - - Delete temporal Files - - - - - - - - - - - -
 
@@ -134,7 +134,7 @@ def ys_get_area(netlist, tech, topmodule):
 
     # - - - - - - - - - - - - - - - Execute yosys - - - - - - - - - - - - - -
 
-    os.system (f'yosys stat.ys -l \"{yosys_log_path}\"')
+    os.system (f'yosys -q stat.ys -l \"{yosys_log_path}\"')
 
     # - - - - - - - - - - - - - - - Parse Area - - - - - - - - - - - - - - -
 

From ba93a64d8580b44e78b50998bdda97646cb63ed3 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Sun, 14 Sep 2025 11:55:23 -0600
Subject: [PATCH 61/65] Replace usage of List for list

---
 ml_algorithms/decision_tree.py | 29 ++++++++++++++---------------
 netlist.py                     |  8 ++++----
 utils.py                       |  5 ++---
 3 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/ml_algorithms/decision_tree.py b/ml_algorithms/decision_tree.py
index c00de16..4a4395f 100644
--- a/ml_algorithms/decision_tree.py
+++ b/ml_algorithms/decision_tree.py
@@ -1,5 +1,4 @@
 from collections import OrderedDict
-from typing import List
 import numpy as np
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.tree._tree import Tree
@@ -42,19 +41,19 @@ class DecisionTreeCircuit:
         Useful parameters include but are not limited to: max_depth,
     """
 
-    clf: DecisionTreeClassifier | List[DecisionTreeClassifier]
+    clf: DecisionTreeClassifier | list[DecisionTreeClassifier]
     one_tree_per_output: bool
-    inputs: List[CircuitVariable]
-    outputs: List[CircuitVariable]
+    inputs: list[CircuitVariable]
+    outputs: list[CircuitVariable]
     _trained: bool
 
-    circuit_inputs: List[str]
-    circuit_outputs: List[str]
+    circuit_inputs: list[str]
+    circuit_outputs: list[str]
 
     def __init__(
         self,
-        circuit_inputs: List[str],
-        circuit_outputs: List[str],
+        circuit_inputs: list[str],
+        circuit_outputs: list[str],
         one_tree_per_output=False,
         **kwargs,
     ):
@@ -72,7 +71,7 @@ def __init__(
         else:
             self.clf = DecisionTreeClassifier(**kwargs)
 
-    def train(self, X: List[List[int]], y: List[List[int]]):
+    def train(self, X: list[list[int]], y: list[list[int]]):
         """Train the decision tree classifier(s) with the training set (X, y).
 
         Parameters
@@ -153,7 +152,7 @@ def to_verilog_file(self, topmodule: str, output_file: str):
             f.write("endmodule\n")
 
 
-def _to_binary(x: List[List[int]], bit_widths: List[int]):
+def _to_binary(x: list[list[int]], bit_widths: list[int]):
     """Convert a list of lists of integers to a binary representation.
 
     This function takes a list input rows `x` and a list of bit widths
@@ -169,9 +168,9 @@ def _to_binary(x: List[List[int]], bit_widths: List[int]):
 
     Parameters
     ----------
-    x : List[List[int]]
+    x : list[list[int]]
         A list of lists of integers, where each inner list represents a row of input data.
-    bit_widths : List[int]
+    bit_widths : list[int]
         A list of integers, where each value represents the number of bits to use for the
         corresponding column in the input data.
 
@@ -215,7 +214,7 @@ def _to_binary(x: List[List[int]], bit_widths: List[int]):
     return result
 
 
-def _parse_circuit_variables(variable_list: List[str]):
+def _parse_circuit_variables(variable_list: list[str]):
     """Parse a list of circuit variable names and bit widths.
 
     TODO: This function should be put in a common module to be used by future ML
@@ -223,13 +222,13 @@ def _parse_circuit_variables(variable_list: List[str]):
 
     Parameters
     ----------
-    input_list : List[str]
+    input_list : list[str]
         A list of strings representing circuit variables, where each variable can be
         either a single-bit variable (e.g., 'cin') or a multi-bit variable (e.g., 'in1[3]').
 
     Returns
     -------
-    List[CircuitVariable]
+    list[CircuitVariable]
         A list of `CircuitVariable` objects, where each object represents a
         circuit variable with a name and bit width.
     """
diff --git a/netlist.py b/netlist.py
index 534516a..87ff499 100644
--- a/netlist.py
+++ b/netlist.py
@@ -269,7 +269,7 @@ def expand_concat(expr):
 
     Returns
     -------
-    List[string]
+    list[string]
         A list of strings like ["a[3]", "a[2]", "a[1]", "a[0]", "b[1]", "c"].
 
     Examples
@@ -310,7 +310,7 @@ def expand_range(expr):
 
     Returns
     -------
-    List[string]
+    list[string]
         A list of strings or bits.
 
     Examples
@@ -361,7 +361,7 @@ def expand_constant(expr):
 
     Returns
     -------
-    List[int]
+    list[int]
         A list of bits like [1, 1, 0].
 
     Examples
@@ -419,7 +419,7 @@ def parse_assigns(content):
 
     Returns
     -------
-    List[Tuple[string, string]]
+    list[Tuple[string, string]]
         A list of (lhs, rhs) assignment pairs, one for each individual bit.
 
     Examples
diff --git a/utils.py b/utils.py
index 37bdcd1..93f6c41 100644
--- a/utils.py
+++ b/utils.py
@@ -2,7 +2,6 @@
 from datetime import datetime
 import random
 import string
-from typing import List
 import numpy as np
 import math
 from random import randrange, gauss, triangular
@@ -103,7 +102,7 @@ def get_random(bits: int, distribution='uniform', samples=1, **kwargs):
 
     return data
 
-def read_dataset(filename: str, base: int, max_lines: None | int =None) -> List[List[int]]:
+def read_dataset(filename: str, base: int, max_lines: None | int =None) -> list[list[int]]:
     """
     Reads a dataset or circuit output file like those generated by the
     `Circuit.generate_dataset` file or `Circuit.exact_output`.
@@ -123,7 +122,7 @@ def read_dataset(filename: str, base: int, max_lines: None | int =None) -> List[
 
     Returns
     ----------
-    dataset : List[List[int]]
+    dataset : list[list[int]]
         The data read from the file. The returned list is ordered by rows first,
         then columns. For example indexing dataset[2][5] requests the 5th
         element of the 2nd row.

From 99c550ea378198ef868f610066cfa65497ccfcff Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Sun, 14 Sep 2025 12:01:08 -0600
Subject: [PATCH 62/65] Improve CSV TODO in configuration.py

---
 configuration.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/configuration.py b/configuration.py
index 1d77c67..566d488 100644
--- a/configuration.py
+++ b/configuration.py
@@ -170,13 +170,17 @@ class ApproxSynthesisConfig:
         just be left blank if not provided.
     """
 
-    # TODO: the configuration options included in the csv: resynthesis, error,
-    # max_depth and one_tree_per_output; were chosen arbitrarily and are not
-    # necessarily more interesting than other options not included. Perhaps the
-    # configuration options included in the CSV should also be configurable, or
-    # we should include any options that are not None, or we should always
-    # include every single possible option and metric in the csv, even those
-    # not specified.
+    # TODO: The csv option requires further thought/design.
+    #
+    # Currently, the configuration options included in the csv (resynthesis,
+    # error, max_depth and one_tree_per_output) were chosen arbitrarily given
+    # what was needed at the time when initially adding csv output; and are
+    # not necessarily more interesting in the general case than other options
+    # not included.
+    # Perhaps the configuration options included in the CSV should also be
+    # configurable, or we should include any options that are not None, or we
+    # should always include every single possible option and metric in the csv,
+    # even those not specified.
 
     method: AlsMethod
     circuit: Circuit

From 5265824a16f9c005aa013e17df5149399c1132d1 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Sun, 14 Sep 2025 12:34:47 -0600
Subject: [PATCH 63/65] Update README and remove unused config and help files
 in favour of the added CLI and configuration object

---
 README.md        | 94 ++++++++++++++++++++++++++++++++++--------------
 __main__.py      |  2 +-
 barcas.py        | 80 -----------------------------------------
 poisonoak.config |  1 -
 poisonoak.help   |  5 ---
 5 files changed, 68 insertions(+), 114 deletions(-)
 delete mode 100644 barcas.py
 delete mode 100644 poisonoak.config
 delete mode 100644 poisonoak.help

diff --git a/README.md b/README.md
index 47fe675..54565da 100644
--- a/README.md
+++ b/README.md
@@ -19,9 +19,11 @@ Roger Morales-Monge, student, Tecnológico de Costa Rica
    3. [Cloning benchmarks](#cloning-benchmarks)
 4. [Executing Demo](#executing-demo)
 5. [Using AxLS](#using-axls)
-   1. [Parsing a netlist](#parsing-a-netlist)
-   2. [Deleting a node](#deleting-a-node)
-   3. [Simulation and Error Estimation](#simulation-and-error-estimation)
+   1. [CLI and simple programmatic usage](#cli-and-simple-programmatic-usage)
+   2. [Library usage](#library-usage)
+       1. [Parsing a netlist](#parsing-a-netlist)
+       2. [Deleting a node](#deleting-a-node)
+       3. [Simulation and Error Estimation](#simulation-and-error-estimation)
 6. [ALS Algorithms](#als-algorithms)
    1. [Pruning Algorithms](#pruning-algorithms)
       - [InOuts](#inouts)
@@ -179,7 +181,44 @@ Mean Error Distance of approximate circuit with node _101_ deleted: 3.979
 
 ## Using AxLS
 
-### Parsing a netlist
+AxLS can be used in multiple ways, either through a CLI, it also accepts configuration parameters programmatically in order to execute the ALS methods in a simplified way, or the ALS methods can be used directly—library style.
+
+### CLI and simple programmatic usage
+
+To print the help, run:
+
+```sh
+python . -h
+```
+
+The CLI has 2 subcommands:
+
+- `run` for executing an ALS method.
+- `generate` in order to generate datasets to be used by the ALS execution for simulation or (in the case of ML methods) training.
+
+Here's an example usage generating a dataset and executing an ALS method:
+
+```sh
+# Requires previously having cloned the ALS-benchmark-circuits repo (see cloning benchmarks section)
+CIRCUIT=ALS-benchmark-circuits/KS_16b/KS_16b.v
+# We generate a simulation dataset of 10k possible input/output pairs for the KS_16b circuit.
+# Uses a uniform distribution of inputs by default.
+python . generate $CIRCUIT test_dataset 10000
+# Run the inconst method, calculating the MRED, circuit area and execution time metrics,
+# accepting at max 20% error introduction, separating 10% of the dataset for validation,
+# and pruning 10 nodes per iteration.
+python . run inconst $CIRCUIT test_dataset mred time area --error 0.2 --validation 0.1 --prunes-per-iteration 10
+```
+
+The tool can also be used programmatically with an interface very similar to the CLI. By using the `run`
+method from `run.py` directly, passing in an `ApproxSynthesisConfig` configuration object.
+
+### Library usage
+
+This section introduces some basic concepts to manipulate a netlist directly
+which is a key part of employing the different ALS mehtods directly.
+
+#### Parsing a netlist
 
 1. First, import the `Circuit` class:
 
@@ -250,7 +289,7 @@ Using this node you can implement your own pruning algorithms. Because ElementTr
 
 
 
-### Deleting a node
+#### Deleting a node
 
 1. The first example method we provide to delete nodes is quite simple, just delete a node based on its name. You can do it in two different ways:
 
@@ -269,7 +308,7 @@ our_circuit.delete("_101_")
 
 When you set the attribute `delete` of a node to `yes`, it means that this node will be deleted the next time our circuit is saved in the filesystem. **The node will remain in the xml tree!** (just in case we need to revert a deletion).
 
-### Simulation and Error Estimation
+#### Simulation and Error Estimation
 
 Simulation stage and error estimation are executed inside one method called `simulate_and_compute_error`. But first, in order to execute a simulation and calculate its error you need to provide:
 
@@ -598,25 +637,26 @@ introducing around ~23% error.
 
 Files and Folders description:
 
-| Name                | Description                                                  | Used   |
-| ------------------- | ------------------------------------------------------------ | ------ |
-| prunning_algorithms | Folder containing pruning techniques implementations.        |        |
-| `inouts.py`         | Contains the implementation of `GetInputs` and `GetOutputs` example pruning methods. |        |
-| `probprun.py`       | Contains the implementation of a pseudo Probabilistic Pruning method. `GetOneNode` is a python generator. It will retrieve one node to delete each time it is called. |        |
-| templates           | Folder containing some libraries and scripts used for synthesis. |        |
-| `NanGate15nm.lib`   |                                                              |        |
-| `NanGate15nm.v`     |                                                              |        |
-| `synth.ys`          | Script to synthesize a circuit using yosys.                 |        |
-| `__main__.py`       | It executes the tool using the arguments from the command line. **Still in progress**. | **No** |
-| `barcas.py`         | Is the Pruning Implementation using the InOuts techniques.   | **NO** |
-| `circuit.py`        | Object that represents a circuit as a XML tree. Receives a rtl and a library in order to build the circuit and be able to simulate it. |        |
-| `circuiterror.py`   | Compares two outputs and computes different error metrics.   |        |
-| `demo.py`           | This file is a complete example of how the library should be used. |        |
-| `netlist.py`        | This class parses, extracts and represents the circuit from rtl into an object understandable by python. |        |
-| `poisonoak.config`  | This is going to be used along with `__main__.py` in order to execute poisonoak as an app, and not as a library. | **No** |
-| `poisonoak.help`    | Contains the menu and tool description of the poison oak app. | **No** |
-| `synthesis.py`      | Executes the synthesis script (in our case yosys) and clean the intermediate files generated. At the end returns the path of the netlist. |        |
-| `technology.py`     | This class parses, extracts and represents the technology library file into an object understandable by python. |        |
-| `test.py`           | This class implements some unit tests for the poison oak library. **Not implemented yet**. | **No** |
-| `utils.py`          | Some functions not related with any other class but useful.  |        |
+| Name                | Description                                                  |
+| ------------------- | ------------------------------------------------------------ |
+| `prunning_algorithms/` | Folder containing pruning techniques implementations.        |
+| `prunning_algorithms/inouts.py`         | Contains the implementation of `GetInputs` and `GetOutputs` example pruning methods. |
+| `prunning_algorithms/probprun.py`       | Contains the implementation of a pseudo Probabilistic Pruning method. `GetOneNode` is a python generator. It will retrieve one node to delete each time it is called. |
+| `ml_algorithms/`              | Folder containing ML techniques implementations.        |
+| `ml_algorithms/decision_tree` | Contains the implementation of the Decision Tree technique through the `DecisionTreeCircuit` class. |
+| `templates/`           | Folder containing some libraries and scripts used for synthesis. |
+| `templates/NanGate15nm.lib`   | Technology file from Nangate. |
+| `templates/NanGate15nm.v`     |                               |
+| `synth.ys`          | Script to synthesize a circuit using yosys.                 |
+| `__main__.py`       | It executes the tool using the arguments from the command line. |
+| `configuration.py`  | Contains a configuration class for executing an ALS flow. Used by the CLI, but can be used by other scripts to do executions programatically without delving into the library's details. |
+| `runner.py`  | Contains a `run` method which accepts a configuration class in order to execute one of the ALS methods. |
+| `circuit.py`        | Object that represents a circuit as a XML tree. Receives a rtl and a library in order to build the circuit and be able to simulate it. |
+| `circuiterror.py`   | Compares two outputs and computes different error metrics.   |
+| `demo.py`           | This file is a complete example of how the library should be used. |
+| `netlist.py`        | This class parses, extracts and represents the circuit from rtl into an object understandable by python. |
+| `synthesis.py`      | Executes the synthesis script (in our case yosys) and clean the intermediate files generated. At the end returns the path of the netlist. |
+| `technology.py`     | This class parses, extracts and represents the technology library file into an object understandable by python. |
+| `utils.py`          | Some functions not related with any other class but useful.  |
+| `test.py`           | Currently unused file, meant to be used for unit tests. Out of date. |
 
diff --git a/__main__.py b/__main__.py
index b5e2edb..5356666 100644
--- a/__main__.py
+++ b/__main__.py
@@ -202,7 +202,7 @@ def generate_arguments(generate_parser):
     generate_parser.add_argument(
         "--distribution",
         default="uniform",
-        choices=["gaussian", "uniform", "triangular", "shuffle_bag"],
+        choices=["uniform", "gaussian", "triangular", "shuffle_bag"],
         help=".",
     )
     generate_parser.add_argument(
diff --git a/barcas.py b/barcas.py
deleted file mode 100644
index 5f7da2d..0000000
--- a/barcas.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from copy import deepcopy
-from circuit import Circuit
-from pruning_algorithms.inouts import GetInputs, GetOutputs
-
-BASE    =   "circuits/ripple.carry.4b/"
-TOP     =   "RCA_3_0"
-MET     =   "wce"
-
-RTL     =   f"{BASE}{TOP}.v"
-TB      =   f"{BASE}{TOP}_tb.v"
-SAIF    =   f"{BASE}{TOP}.saif"
-ORIG    =   f"{BASE}output0.txt"
-APPR    =   f"{BASE}output.txt"
-
-def log (msg):
-    with open(f"{BASE}log.txt", "a+") as f:
-        f.write(msg)
-    print(msg)
-
-def barcas(circuit, max_error):
-
-    log(f"Pruning circuit for Max Error of: {max_error}\n")
-
-    actual_error = 0
-
-    last_stable_circuit = deepcopy(circuit)
-    modified_circuit = deepcopy(circuit)
-
-    for bit in range (0, 4):
-
-        for type in ["i","o"]:
-
-            if type == "i":
-                inputs = [f"in1[{bit}]",f"in2[{bit}]"]
-                nodes = GetInputs(modified_circuit.netl_root, inputs)
-            else:
-                outputs = [f"out[{bit}]"]
-                nodes = GetOutputs(modified_circuit.netl_root, outputs)
-
-            #print(nodes)
-
-            for node in nodes:
-                modified_circuit.delete(node.attrib["var"])
-
-                obtained_error = modified_circuit.simulate(TB, MET, ORIG, APPR)
-
-                nvar = node.attrib["var"];
-
-                msg = f"Node Deleted: {nvar}, error({MET}): {obtained_error}\n"
-
-                log(msg);
-
-                if obtained_error <= max_error:
-                    last_stable_circuit.delete(node.attrib["var"])
-                    actual_error = obtained_error
-                else:
-                    modified_circuit.undodelete(node.attrib["var"])
-
-
-        if (actual_error == max_error):
-            break
-
-    final_error = last_stable_circuit.simulate(TB, MET, ORIG, APPR, clean=False)
-
-    last_stable_circuit.show(show_deletes=True)
-    input("Press enter...")
-
-    msg = f"[FINAL] Expected: {max_error}, Obtained: {final_error}\n"
-    log(msg)
-
-our_circuit = Circuit(RTL, "NanGate15nm")
-
-for error in [8]: #range (10, 101, 10):
-
-    our_circuit.exact_output(TB)
-    barcas(our_circuit, error)
-    '''
-    x = threading.Thread(target=barcas, args=(our_circuit, error,))
-    x.start()
-    '''
diff --git a/poisonoak.config b/poisonoak.config
deleted file mode 100644
index ddab841..0000000
--- a/poisonoak.config
+++ /dev/null
@@ -1 +0,0 @@
-RTL=/home/sudohumberto/circuits/brent.kung.16b/UBBKA_15_0_15_0.v
diff --git a/poisonoak.help b/poisonoak.help
deleted file mode 100644
index 2fc1706..0000000
--- a/poisonoak.help
+++ /dev/null
@@ -1,5 +0,0 @@
-In order to use the program as a tool, you need to setup a config file.
-
-1. Create a file named 'poisonoak.config' inside poisonoak folder:
-
-2. Content of the config files

From a2d107b37586b75dcb1bcbdaec4f31c12a07f90d Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Sun, 14 Sep 2025 12:36:33 -0600
Subject: [PATCH 64/65] Improve section name

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 54565da..2335ab2 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ Roger Morales-Monge, student, Tecnológico de Costa Rica
    3. [Cloning benchmarks](#cloning-benchmarks)
 4. [Executing Demo](#executing-demo)
 5. [Using AxLS](#using-axls)
-   1. [CLI and simple programmatic usage](#cli-and-simple-programmatic-usage)
+   1. [CLI and simplified API usage](#cli-and-simplified-api-usage)
    2. [Library usage](#library-usage)
        1. [Parsing a netlist](#parsing-a-netlist)
        2. [Deleting a node](#deleting-a-node)
@@ -183,7 +183,7 @@ Mean Error Distance of approximate circuit with node _101_ deleted: 3.979
 
 AxLS can be used in multiple ways, either through a CLI, it also accepts configuration parameters programmatically in order to execute the ALS methods in a simplified way, or the ALS methods can be used directly—library style.
 
-### CLI and simple programmatic usage
+### CLI and simplified API usage
 
 To print the help, run:
 

From 1be77251d01297267124d57956411e361af18730 Mon Sep 17 00:00:00 2001
From: Ignacio Vargas <ignaevc@gmail.com>
Date: Sun, 14 Sep 2025 12:37:05 -0600
Subject: [PATCH 65/65] Embolden TODO

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 2335ab2..770c7c7 100644
--- a/README.md
+++ b/README.md
@@ -375,7 +375,7 @@ This framework currently provides 2 kinds of ALS algorithms:
 These algorithms suggest which nodes to delete based on circuit data or
 heuristics.
 
-TODO: Missing documentation on `ccarving` and `glpsignificance`
+**TODO: Missing documentation on `ccarving` and `glpsignificance`**
 
 #### InOuts