make automated tools async all the way down, address comments, revamp profile system to be easier to use for agents

bcallender · bcallender · commit 52be67d505f7 · 2025-09-22T09:59:33.000-07:00
diff --git a/src/fenic/api/mcp/tool_generation.py b/src/fenic/api/mcp/tool_generation.py
@@ -15,20 +15,28 @@
 import functools
 import hashlib
 import inspect
-import json
 import re
-from dataclasses import dataclass, asdict
-from typing import Callable, Dict, List, Literal, Optional, TypedDict, Union, Coroutine, Any
+from dataclasses import dataclass
+from inspect import iscoroutinefunction
+from typing import (
+    Any,
+    Callable,
+    Coroutine,
+    Dict,
+    List,
+    Literal,
+    Optional,
+    Union,
+)
 
-from fastmcp.server.context import Context
 import polars as pl
+from fastmcp.server.context import Context
 from typing_extensions import Annotated
 
 from fenic.api.dataframe.dataframe import DataFrame
 from fenic.api.functions import (
     avg,
     col,
-    count,
     stddev,
 )
 from fenic.api.functions import max as max_
@@ -47,6 +55,8 @@
     StringType,
 )
 
+PROFILE_MAX_SAMPLE_SIZE = 10_000
+
 
 @dataclass
 class DatasetSpec:
@@ -101,18 +111,28 @@ def fenic_tool(
     tool_name: str,
     tool_description: str,
     max_result_limit: Optional[int] = None,
+    client_limit_parameter: bool = True,
     default_table_format: TableFormat = "markdown",
     read_only: bool = True,
     idempotent: bool = True,
     destructive: bool = False,
     open_world: bool = False,
-) -> Callable[[Callable[..., Coroutine[Any, Any, DataFrame]]], DynamicToolDefinition]:
+) -> Callable[[
+        Union[
+            Callable[..., Coroutine[Any, Any, DataFrame]],
+            Callable[..., DataFrame]
+        ]], DynamicToolDefinition]:
     """Decorator to bind a DataFrame to a user-authored tool function.
 
+    Can be added to a synchronous or asynchronous (recommended) tool function.
+    Function based tools (dynamic tools) cannot be persisted to the catalog.
+    See the (Fenic MCP documentation)[https://fenic.ai/docs/topics/fenic-mcp] for more details.
+
     Args:
         tool_name: The name of the tool.
         tool_description: The description of the tool.
-        max_result_limit: The maximum number of results to return.
+        max_result_limit: The maximum number of results to return. If omitted, no limit will be enforced.
+        client_limit_parameter: Whether to add a client-side limit parameter to the tool.
         default_table_format: The default table format to return.
         read_only: A hint to provide to the model that the tool does not modify its environment.
         idempotent: A hint to provide to the model that calling the tool multiple times with the same input will always return the same result (redundant if read_only is True).
@@ -136,10 +156,10 @@ def find_rust(
 
     Example: Creating an open-world tool that reaches out to an external API. The open_world flag indicates to the model that the tool may interact with an "open world" of external entities
         @fenic_tool(tool_name="search_knowledge_base", tool_description="...", open_world=True)
-        def search_knowledge_base(
+        async def search_knowledge_base(
             query: Annotated[str, "Knowledge base search query"],
         ) -> DataFrame:
-            results = requests.get(...)
+            results = await requests.get(...)
             return fc.create_dataframe(results)
 
     Notes:
@@ -149,20 +169,26 @@ def search_knowledge_base(
     - The returned object is a DynamicTool ready for registration.
     - A `limit` parameter is automatically added to the function signature, which can be used to limit the number of rows returned up to the tool's `max_result_limit`.
     - A `table_format` parameter is automatically added to the function signature, which can be used to specify the format of the returned data (markdown, structured)
+    - The `add_limit_parameter` flag can be used to control whether the client is allowed to specify a limit parameter.
     """
 
-    def decorator(func: Callable[..., Coroutine[Any, Any, DataFrame]]) -> DynamicToolDefinition:
+    def decorator(
+        func: Union[Callable[..., Coroutine[Any, Any, DataFrame]], Callable[..., DataFrame]]) -> DynamicToolDefinition:
         _ensure_no_var_args(func, func_label=tool_name)
 
         @functools.wraps(func)
         async def wrapper(*args, **kwargs) -> LogicalPlan:
-            result_df = await func(*args, **kwargs)
+            if iscoroutinefunction(func):
+                result_df = await func(*args, **kwargs)
+            else:
+                result_df = await asyncio.to_thread(lambda: func(*args, **kwargs))
             return result_df._logical_plan
 
         return DynamicToolDefinition(
             name=tool_name,
             description=tool_description,
             max_result_limit=max_result_limit,
+            add_limit_parameter=client_limit_parameter,
             default_table_format=default_table_format,
             read_only=read_only,
             idempotent=idempotent,
@@ -543,7 +569,7 @@ def _apply_paging(
 
 
 @dataclass
-class ProfileRow:
+class _ProfileRow:
     dataset_name: str
     column_name: str
     data_type: str
@@ -584,26 +610,9 @@ def _auto_generate_profile_tool(
         raise ValueError("Cannot create profile tool: no datasets provided.")
     tool_key = _sanitize_name(tool_name)
 
-    async def _materialize_dataset_description(df: DataFrame, dataset_name: str, view_name: str) -> None:
-        profile_rows = await _compute_profile_rows(df, dataset_name, topk_distinct)
-        pl_df = pl.DataFrame(profile_rows)
-        plan = InMemorySource.from_session_state(pl_df, session._session_state)
-        catalog = session._session_state.catalog
-        catalog.drop_view(view_name, ignore_if_not_exists=True)
-        catalog.create_view(view_name, plan)
-
-    async def _ensure_profile_view_for_dataset(spec: DatasetSpec, refresh: bool) -> LogicalPlan:
-        schema_hash = _schema_fingerprint(spec.df)
-        view_name = f"__fenic_profile__{tool_key}__{_sanitize_name(spec.table_name)}__{schema_hash}"
-        catalog = session._session_state.catalog
-        if refresh or not catalog.does_view_exist(view_name):
-            await _materialize_dataset_description(spec.df, spec.table_name, view_name)
-        return catalog.get_view_plan(view_name)
-
     async def profile_func(
         df_name: Annotated[
             str | None, "Optional DataFrame name to return a single profile for. To return profiles for all datasets, omit this parameter."] = None,
-        refresh: Annotated[bool, "Recompute and refresh cached profile view(s)"] = False,
     ) -> LogicalPlan:
         # sometimes the models get...very confused, and pass the null string instead of `null` or omitting the field entirely
         if not df_name or df_name == "null":
@@ -614,13 +623,12 @@ async def profile_func(
             if spec is None:
                 raise ValidationError(
                     f"Unknown dataset '{df_name}'. Available: {', '.join(d.table_name for d in datasets)}")
-            return await _ensure_profile_view_for_dataset(spec, refresh)
+            return await _ensure_profile_view_for_dataset(session, tool_key, spec, topk_distinct)
 
         # Multi-dataset: concatenate cached views (or compute & cache if missing)
         profile_df = None
         for spec in datasets:
-            # Ensure view exists and read it, then convert to polars for concatenation
-            plan = await _ensure_profile_view_for_dataset(spec, refresh)
+            plan = await _ensure_profile_view_for_dataset(session, tool_key, spec, topk_distinct)
             df = DataFrame._from_logical_plan(plan, session_state=session._session_state)
             if not profile_df:
                 profile_df = df
@@ -636,21 +644,42 @@ async def profile_func(
         max_result_limit=None,
     )
 
+async def _ensure_profile_view_for_dataset(
+    session: Session,
+    tool_key: str,
+    spec: DatasetSpec,
+    topk_distinct: int,
+) -> LogicalPlan:
+    schema_hash = _schema_fingerprint(spec.df)
+    view_name = f"__fenic_profile__{tool_key}__{_sanitize_name(spec.table_name)}__{schema_hash}"
+    catalog = session._session_state.catalog
+    if not catalog.does_view_exist(view_name):
+        profile_rows = await _compute_profile_rows(
+            spec.df,
+            spec.table_name,
+            topk_distinct,
+        )
+        view_plan = InMemorySource.from_session_state(
+            pl.DataFrame(profile_rows), session._session_state,
+        )
+        catalog.create_view(view_name, view_plan)
+    return catalog.get_view_plan(view_name)
+
 async def _compute_profile_rows(
     df: DataFrame,
     dataset_name: str,
-    topk_distinct: int
-) -> List[ProfileRow]:
+    topk_distinct: int,
+) -> List[_ProfileRow]:
     pl_df = df.to_polars()
     total_rows = pl_df.height
-    sampled_df = pl_df.sample(10000)
-    rows_list: List[ProfileRow] = []
+    sampled_df = pl_df.sample(min(total_rows, PROFILE_MAX_SAMPLE_SIZE))
+    rows_list: List[_ProfileRow] = []
     for field in df.schema.column_fields:
         col_name = field.name
         dtype_str = str(field.data_type)
         null_count = sampled_df.select(pl.col(col_name).is_null().sum()).item()
         non_null_count = sampled_df.height - null_count
-        stats: ProfileRow = ProfileRow(
+        stats = _ProfileRow(
             dataset_name=dataset_name,
             column_name=col_name,
             data_type=dtype_str,
diff --git a/src/fenic/core/mcp/_server.py b/src/fenic/core/mcp/_server.py
@@ -147,30 +147,24 @@ def _handle_result_set(
         original_result_count = len(pl_df)
         if effective_limit and original_result_count > effective_limit:
             pl_df = pl_df.limit(effective_limit)
+        schema_fields = [{"name": name, "type": str(dtype)} for name, dtype in pl_df.schema.items()]
+        rows_list = pl_df.to_dicts()
+        returned_result_count = len(rows_list)
         if table_format == "structured":
-            rows_list = pl_df.to_dicts()
-            schema_fields = [{"name": name, "type": str(dtype)} for name, dtype in pl_df.schema.items()]
             result_set = MCPResultSet(
                 table_schema=schema_fields,
                 rows=rows_list,
-                returned_result_count=len(rows_list),
+                returned_result_count=returned_result_count,
                 total_result_count=original_result_count,
             )
         else:
-            with pl.Config(
-                tbl_hide_dataframe_shape=True,
-                tbl_cols=-1,
-                tbl_rows=-1,
-                tbl_width_chars=-1,
-                fmt_str_lengths=25000 #TODO(bcallender): make this configurable
-            ):
-                rows = repr(pl_df)
-                result_set = MCPResultSet(
-                    table_schema=None,
-                    rows=rows,
-                    returned_result_count=len(pl_df),
-                    total_result_count=original_result_count,
-                )
+            rows = _render_markdown_preview(rows_list)
+            result_set = MCPResultSet(
+                table_schema=schema_fields,
+                rows=rows,
+                returned_result_count=returned_result_count,
+                total_result_count=original_result_count,
+            )
         return result_set
 
     def _build_parameterized_tool(self, tool: ParameterizedToolDefinition):
diff --git a/src/fenic/core/mcp/types.py b/src/fenic/core/mcp/types.py
@@ -1,7 +1,7 @@
 """Exported Types related to Parameterized View/MCP Tool Generation."""
 from __future__ import annotations
 
-from typing import Annotated, Callable, List, Optional, Union, Coroutine, Any
+from typing import Annotated, Any, Callable, Coroutine, List, Optional, Union
 
 from pydantic import BaseModel, ConfigDict, model_validator
 from pydantic.dataclasses import dataclass
@@ -80,16 +80,17 @@ class ParameterizedToolDefinition:
 class DynamicToolDefinition:
     """A tool implemented as a regular Python function with explicit parameters.
 
-    The function must be a `Callable[..., LogicalPlan]`. Collection/formatting is handled by
+    The function must be a `Callable[..., Coroutine[Any, Any, LogicalPlan]]` 
+    (a function defined with `async def`). Collection/formatting is handled by
     the MCP generator wrapper.
     """
     name: str
     description: str
     max_result_limit: Optional[int]
     func: Callable[..., Coroutine[Any, Any, LogicalPlan]]
-    add_limit_parameter: bool = True
+    add_limit_parameter: bool = True 
     default_table_format: TableFormat = "markdown"
-    read_only: Annotated[bool, "A hint to provide to the model that the tool is read-only."] = True
-    idempotent: Annotated[bool, "A hint to provide to the model that the tool is idempotent."] = True
-    destructive: Annotated[bool, "A hint to provide to the model that the tool is destructive."] = False
-    open_world: Annotated[bool, "A hint to provide to the model that the tool reaches out to external endpoints/knowledge bases."] = False
+    read_only: Annotated[bool, "A hint to provide to the client that the tool is read-only."] = True
+    idempotent: Annotated[bool, "A hint to provide to the client that the tool is idempotent."] = True
+    destructive: Annotated[bool, "A hint to provide to the client that the tool is destructive."] = False
+    open_world: Annotated[bool, "A hint to provide to the client that the tool reaches out to external endpoints/knowledge bases."] = False
diff --git a/tests/api/mcp/test_tool_generation.py b/tests/api/mcp/test_tool_generation.py
@@ -1,3 +1,4 @@
+import asyncio
 import inspect
 
 import pytest
@@ -55,7 +56,32 @@ def test_auto_generate_core_tools_from_tables_builds_tools(local_session):
     assert set(pl_df.columns) == {"dataset", "schema"}
     assert sorted(pl_df.get_column("dataset").to_list()) == ["t1", "t2"]
 
-def test_fenic_tool_decorator(local_session: Session):
+def test_fenic_tool_decorator_sync(local_session: Session):
+
+    @fenic_tool(tool_name="test", tool_description="test", max_result_limit=100, default_table_format="markdown")
+    def test_sync(numbers: list[int]):
+        return local_session.create_dataframe({"numbers": numbers})
+
+    assert test_sync.max_result_limit == 100
+    assert test_sync.default_table_format == "markdown"
+    assert isinstance(test_sync, DynamicToolDefinition)
+    assert callable(test_sync.func)
+    # underlying function is synchronous, but we want the mcp
+    # function wrapping it to be async
+    assert inspect.iscoroutinefunction(test_sync.func)
+    func_signature = inspect.signature(test_sync.func)
+    assert len(func_signature.parameters) == 1
+    assert "numbers" in func_signature.parameters
+    # limit/table_format are added by the MCP server wrapper, so should not be in the raw function signature
+    assert "limit" not in func_signature.parameters
+    assert "table_format" not in func_signature.parameters
+
+    test_sync = asyncio.run(test_sync.func(list(range(100))))
+    pl_df, _ = local_session._session_state.execution.collect(test_sync)
+    assert pl_df.get_column("numbers").to_list() == list(range(100))
+
+def test_fenic_tool_decorator_async(local_session: Session):
+
     @fenic_tool(tool_name="test", tool_description="test", max_result_limit=100, default_table_format="markdown")
     async def test(numbers: list[int]):
         return local_session.create_dataframe({"numbers": numbers})
@@ -64,9 +90,14 @@ async def test(numbers: list[int]):
     assert test.default_table_format == "markdown"
     assert isinstance(test, DynamicToolDefinition)
     assert callable(test.func)
+    assert inspect.iscoroutinefunction(test.func)
     func_signature = inspect.signature(test.func)
     assert len(func_signature.parameters) == 1
     assert "numbers" in func_signature.parameters
     # limit/table_format are added by the MCP server wrapper, so should not be in the raw function signature
     assert "limit" not in func_signature.parameters
     assert "table_format" not in func_signature.parameters
+
+    test_async = asyncio.run(test.func(list(range(100))))
+    pl_df, _ = local_session._session_state.execution.collect(test_async)
+    assert pl_df.get_column("numbers").to_list() == list(range(100))
diff --git a/tools/agentic_mcp_evaluation/mcp_evaluation_report_gpt_5_mini.md b/tools/agentic_mcp_evaluation/mcp_evaluation_report_gpt_5_mini.md
diff --git a/tools/agentic_mcp_evaluation/mcp_to_evaluate.py b/tools/agentic_mcp_evaluation/mcp_to_evaluate.py