Merge pull request #43 from Axiomatic-AI/axtract_helper

tymekaxai · web-flow · commit 0cbe740e5b3c · 2025-03-20T19:42:30.000+01:00
now working :)
diff --git a/src/axiomatic/axtract/axtract_report.py b/src/axiomatic/axtract/axtract_report.py
@@ -1,5 +1,5 @@
 from .relation_graph import generate_relation_graph
-from .. import EquationExtractionResponse
+from .. import EquationProcessingResponse
 import os
 import re
 
@@ -190,7 +190,7 @@
 """
 
 
-def create_report(report_data: EquationExtractionResponse, report_path: str = "./report.html"):
+def create_report(report_data: EquationProcessingResponse, report_path: str = "./report.html"):
     """
     Creates an HTML report for the extracted equations.
     """
diff --git a/src/axiomatic/axtract/interactive_table.py b/src/axiomatic/axtract/interactive_table.py
@@ -2,7 +2,8 @@
 from IPython.display import display  # type: ignore
 import json  # type: ignore
 import os  # type: ignore
-from .. import EquationExtractionResponse, VariableRequirement
+from .. import EquationProcessingResponse, VariableRequirement
+from typing import Dict, Any
 
 
 def _find_symbol(name, variable_dict):
@@ -33,15 +34,15 @@ def _requirements_from_table(results, variable_dict):
     return requirements
 
 
-def interactive_table(loaded_equations, file_path="./custom_presets.json"):
+def interactive_table(loaded_equations: EquationProcessingResponse, file_path: str = "./custom_presets.json"):
     """
     Creates an interactive table for IMAGING_TELESCOPE,
     PAYLOAD, and user-defined custom templates.
     Adds or deletes rows, and can save custom templates persistently in JSON.
 
     Parameters
     ----------
-    loaded_equations : EquationExtractionResponse
+    loaded_equations : EquationProcessingResponse
         The extracted equations containing variable information
     file_path : str, optional
         JSON file path where we load and save user-created custom templates.
@@ -55,49 +56,35 @@ def interactive_table(loaded_equations, file_path="./custom_presets.json"):
     # ---------------------------------------------------------------
     # 1) Define built-in templates and units directly inside the function
     # ---------------------------------------------------------------
-    IMAGING_TELESCOPE_template = {
-        "Resolution (panchromatic)": 0,
-        "Ground sampling distance (panchromatic)": 0,
-        "Resolution (multispectral)": 0,
-        "Ground sampling distance (multispectral)": 0,
-        "Altitude": 0,
-        "Half field of view": 0,
-        "Mirror aperture": 0,
-        "F-number": 0,
-        "Focal length": 0,
-        "Pixel size (panchromatic)": 0,
-        "Pixel size (multispectral)": 0,
-        "Swath width": 0,
-    }
 
     IMAGING_TELESCOPE = {
-        "Resolution (panchromatic)": 1.23529,
-        "Ground sampling distance (panchromatic)": 0.61765,
-        "Resolution (multispectral)": 1.81176,
-        "Ground sampling distance (multispectral)": 0.90588,
+        "Resolved Ground Detail, Panchromatic": 1.23529,
+        "Ground Sample Distance, Panchromatic": 0.61765,
+        "Resolved Ground Detail, Multispectral": 1.81176,
+        "Ground Sample Distance, Multispectral": 0.90588,
         "Altitude": 420000,
-        "Half field of view": 0.017104227,
-        "Mirror aperture": 0.85,
-        "F-number": 6.0,
+        "Horizontal Field of View": 0.017104227,
+        "Aperture diameter": 0.85,
+        "f-number": 6.0,
         "Focal length": 5.1,
-        "Pixel size (panchromatic)": 7.5e-6,
-        "Pixel size (multispectral)": 11e-6,
-        "Swath width": 14368.95,
+        "Pixel pitch": 7.5e-6,
+        "Pixel pitch of the multispectral sensor": 11e-6,
+        "Swath Width": 14368.95,
     }
 
     IMAGING_TELESCOPE_UNITS = {
-        "Resolution (panchromatic)": "m",
-        "Ground sampling distance (panchromatic)": "m",
-        "Resolution (multispectral)": "m",
-        "Ground sampling distance (multispectral)": "m",
+        "Resolved Ground Detail, Panchromatic": "m",
+        "Ground Sample Distance, Panchromatic": "m",
+        "Resolved Ground Detail, Multispectral": "m",
+        "Ground Sample Distance, Multispectral": "m",
         "Altitude": "m",
-        "Half field of view": "rad",
-        "Mirror aperture": "m",
-        "F-number": "dimensionless",
+        "Horizontal Field of View": "rad",
+        "Aperture diameter": "m",
+        "f-number": "dimensionless",
         "Focal length": "m",
-        "Pixel size (panchromatic)": "m",
-        "Pixel size (multispectral)": "m",
-        "Swath width": "m",
+        "Pixel pitch": "m",
+        "Pixel pitch of the multispectral sensor": "m",
+        "Swath Width": "m",
     }
 
     PAYLOAD_1 = {
@@ -120,7 +107,6 @@ def interactive_table(loaded_equations, file_path="./custom_presets.json"):
     preset_options_dict = {
         "Select a template": [],
         "IMAGING TELESCOPE": list(IMAGING_TELESCOPE.keys()),
-        "IMAGING TELESCOPE template": list(IMAGING_TELESCOPE_template.keys()),
         "PAYLOAD": list(PAYLOAD_1.keys()),
     }
 
@@ -170,7 +156,7 @@ def save_custom_presets(custom_data, file_path):
     name_label_width = ["150px"]
 
     # Dictionary to keep track of row widget references
-    value_widgets = {}
+    value_widgets: Dict[str, Any] = {}
 
     # ---------------------------------------------------------------
     # 6) display_table(change): Re-populate rows when user selects a template
@@ -272,11 +258,6 @@ def submit_values(_):
         result["values"] = updated_values
         requirements_result[0] = _requirements_from_table(result, variable_dict)
 
-        # Display a confirmation message
-        with message_output:
-            message_output.clear_output()
-            print("Requirements submitted successfully!")
-
         return requirements_result[0]
 
     # ---------------------------------------------------------------
@@ -382,14 +363,14 @@ def save_requirements(_):
     return requirements_result
 
 
-def _create_variable_dict(equation_response: EquationExtractionResponse) -> dict:
+def _create_variable_dict(equation_response: EquationProcessingResponse) -> dict:
     """
-    Creates a variable dictionary from an EquationExtractionResponse object
+    Creates a variable dictionary from an EquationProcessingResponse object
     for use with the interactive_table function.
 
     Parameters
     ----------
-    equation_response : EquationExtractionResponse
+    equation_response : EquationProcessingResponse
         The equation extraction response containing equations and their symbols
 
     Returns
@@ -406,9 +387,13 @@ def _create_variable_dict(equation_response: EquationExtractionResponse) -> dict
 
     # Iterate through all equations and their symbols
     for equation in equation_response.equations:
-        for symbol in equation.latex_symbols:
+        wolfram_symbols = equation.wolfram_symbols
+        latex_symbols = [equation.latex_symbols[i].key for i in range(len(equation.latex_symbols))]
+        names = [equation.latex_symbols[i].value for i in range(len(equation.latex_symbols))]
+
+        for symbol, name in zip(wolfram_symbols, names):
             # Only add if not already present (avoid duplicates)
-            if symbol.key not in variable_dict:
-                variable_dict[symbol.key] = {"name": symbol.value}
+            if symbol not in variable_dict:
+                variable_dict[symbol] = {"name": name}
 
     return variable_dict
diff --git a/src/axiomatic/axtract/relation_graph.py b/src/axiomatic/axtract/relation_graph.py
@@ -1,5 +1,5 @@
 from typing import List
-from .. import EquationExtraction
+from ..types.response_equation import ResponseEquation
 from pyvis.network import Network  # type: ignore
 
 
@@ -20,7 +20,7 @@ def normalize_latex_symbol(symbol: str) -> str:
     return symbol
 
 
-def generate_relation_graph(equations: List[EquationExtraction]) -> str:
+def generate_relation_graph(equations: List[ResponseEquation]) -> str:
     """
     Generates HTML code for a bipartite graph visualization.
     Green nodes represent equations, red nodes represent variables.
@@ -41,7 +41,7 @@ def generate_relation_graph(equations: List[EquationExtraction]) -> str:
     # Add equation nodes (green) and variable nodes (red)
     for eq in equations:
         # Add equation node with unique identifier
-        eq_name = f"Eq: {eq.name} ({eq.id})"  # Add ID to make each node unique
+        eq_name = f"Eq: {eq.name}"  # Add ID to make each node unique
         net.add_node(
             eq_name,
             label=eq.name,
diff --git a/src/axiomatic/axtract/validation_results.py b/src/axiomatic/axtract/validation_results.py
@@ -6,7 +6,8 @@
 
 def display_full_results(validation_results, requirements=None, show_hypergraph=True):
     """Display equation validation results optimized for dark theme notebooks."""
-    validations = validation_results.get("validations", {})
+    # If validation_results is already a dict, use it directly
+    validations = validation_results if isinstance(validation_results, dict) else validation_results.validations
 
     matching = []
     non_matching = []
@@ -15,13 +16,16 @@ def display_full_results(validation_results, requirements=None, show_hypergraph=
         equation_data = {
             "name": eq_name,
             "latex": value.get("original_format", ""),
-            "lhs": value.get("lhs_value"),
-            "rhs": value.get("rhs_value"),
-            "diff": abs(value.get("lhs_value", 0) - value.get("rhs_value", 0)),
-            "percent_diff": abs(value.get("lhs_value", 0) - value.get("rhs_value", 0))
-            / max(abs(value.get("rhs_value", 0)), 1e-10)
+            "lhs": float(value.get("lhs_value", 0)),
+            "rhs": float(value.get("rhs_value", 0)),
+            "diff": abs(float(value.get("lhs_value", 0)) - float(value.get("rhs_value", 0))),
+            "percent_diff": abs(float(value.get("lhs_value", 0)) - float(value.get("rhs_value", 0)))
+            / max(abs(float(value.get("rhs_value", 0))), 1e-10)
             * 100,
-            "used_values": value.get("used_values", {}),
+            "used_values": {
+                k: float(v.split("*^")[0]) * (10 ** float(v.split("*^")[1])) if "*^" in v else float(v)
+                for k, v in value.get("used_values", {}).items()
+            },
         }
         if value.get("is_valid"):
             matching.append(equation_data)
diff --git a/src/axiomatic/client.py b/src/axiomatic/client.py
@@ -4,11 +4,14 @@
 import requests # type: ignore
 import os
 import time
-import json
-from typing import Dict
+from typing import Dict, Optional, Sequence
 
 from .base_client import BaseClient, AsyncBaseClient
-from .types.parse_response import ParseResponse
+from . import ParseResponse, EquationProcessingResponse
+from .axtract.axtract_report import create_report
+from .axtract.validation_results import display_full_results
+from .types.variable_requirement import VariableRequirement
+from .types.equation_validation_result import EquationValidationResult
 
 
 class Axiomatic(BaseClient):
@@ -112,6 +115,142 @@ def load_parsed_pdf(self, path: str) -> ParseResponse:
             inline_equations=inline_equations,
         )
 
+class AxtractHelper:
+    _ax_client: Axiomatic
+
+    def __init__(self, ax_client: Axiomatic):
+        """Initialize the AxtractHelper with an Axiomatic client.
+
+        Args:
+            ax_client (Axiomatic): The Axiomatic client instance to use for API calls
+        """
+        self._ax_client = ax_client
+
+    def create_report(self, response: EquationProcessingResponse, path: str):
+        """Create a report from equation extraction results.
+
+        Args:
+            response (EquationExtractionResponse): The extracted equations and their metadata
+            path (str): Directory path where the report should be saved
+        """
+        create_report(response, path)
+
+    def analyze_equations(
+        self,
+        file_path: Optional[str] = None,
+        url_path: Optional[str] = None,
+        parsed_paper: Optional[ParseResponse] = None,
+    ) -> Optional[EquationProcessingResponse]:
+        """Analyze and extract equations from a scientific document.
+
+        This method supports three input methods:
+        1. Local PDF file path
+        2. URL to a PDF (with special handling for arXiv URLs)
+        3. Pre-parsed paper data
+
+        Args:
+            file_path (Optional[str]): Path to a local PDF file
+            url_path (Optional[str]): URL to a PDF file (supports arXiv links)
+            parsed_paper (Optional[ParseResponse]): Pre-parsed paper data
+
+        Returns:
+            Optional[EquationExtractionResponse]: Extracted equations and their metadata.
+            Returns None if no valid input is provided.
+
+        Examples:
+            # From local file
+            client.analyze_equations(file_path="path/to/paper.pdf")
+            
+            # From URL
+            client.analyze_equations(url_path="https://arxiv.org/pdf/2203.00001.pdf")
+            
+            # From parsed paper
+            client.analyze_equations(parsed_paper=parsed_data)
+        """
+        if file_path:
+            with open(file_path, "rb") as pdf_file:
+                parsed_document = self._ax_client.document.parse(file=pdf_file)
+                print("We are almost there")
+                response = self._ax_client.document.equation.process(
+                    markdown=parsed_document.markdown, 
+                    interline_equations=parsed_document.interline_equations,
+                    inline_equations=parsed_document.inline_equations
+                    )
+        
+        elif url_path:
+            if "arxiv" in url_path and "abs" in url_path:
+                url_path = url_path.replace("abs", "pdf")
+            url_file = requests.get(url_path)
+            from io import BytesIO
+            pdf_stream = BytesIO(url_file.content)
+            parsed_document = self._ax_client.document.parse(file=url_file.content)
+            print("We are almost there")
+            response = self._ax_client.document.equation.process(
+                markdown=parsed_document.markdown, 
+                interline_equations=parsed_document.interline_equations,
+                inline_equations=parsed_document.inline_equations
+                )
+        
+        elif parsed_paper:
+            response = EquationProcessingResponse.model_validate(
+                self._ax_client.document.equation.process(**parsed_paper.model_dump()).model_dump()
+            )
+        
+        else:
+            print("Please provide either a file path or a URL to analyze.")
+            return None
+        
+        return response
+
+    def validate_equations(
+        self,
+        requirements: Sequence[VariableRequirement],
+        loaded_equations: EquationProcessingResponse,
+        include_internal_model: bool = False,
+    ) -> EquationValidationResult:
+        """Validate equations against a set of variable requirements.
+
+        Args:
+            requirements: List of variable requirements to validate
+            loaded_equations: Previously processed equations to validate
+            show_hypergraph: Whether to display the validation results graph (default: True)
+            include_internal_model: Whether to include internal model equations in validation (default: False)
+
+        Returns:
+            EquationValidationResult containing the validation results
+        """
+        # equations_dict = loaded_equations.model_dump() if hasattr(loaded_equations, 'model_dump') else loaded_equations.dict()
+        
+        api_response = self._ax_client.document.equation.validate(
+            variables=requirements, 
+            paper_equations=loaded_equations,
+            include_internal_model=include_internal_model
+            )
+        
+        return api_response
+    
+
+    def display_full_results(self, api_response: EquationValidationResult, user_choice):
+        display_full_results(api_response, user_choice)
+    
+
+    def set_numerical_requirements(self, extracted_equations: EquationProcessingResponse):
+        """Launch an interactive interface for setting numerical requirements for equations.
+
+        This method opens an interactive table interface where users can specify
+        requirements for variables found in the extracted equations.
+
+        Args:
+            extracted_equations: The equations to set requirements for
+
+        Returns:
+            The requirements set through the interactive interface
+        """
+        from .axtract.interactive_table import interactive_table
+
+        result = interactive_table(extracted_equations)
+        return result
+
 
 class ToolsHelper:
     _ax_client: Axiomatic