typedef-ai
diff --git a/‎docs/topics/fenic-mcp.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/topics/fenic-mcp.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/fenic/api/mcp/tool_generation.py‎
Lines changed: 73 additions & 48 deletions b/‎src/fenic/api/mcp/tool_generation.py‎
Lines changed: 73 additions & 48 deletions
diff --git a/‎src/fenic/core/mcp/_server.py‎
Lines changed: 11 additions & 17 deletions b/‎src/fenic/core/mcp/_server.py‎
Lines changed: 11 additions & 17 deletions
diff --git a/‎src/fenic/core/mcp/types.py‎
Lines changed: 8 additions & 7 deletions b/‎src/fenic/core/mcp/types.py‎
Lines changed: 8 additions & 7 deletions
@@ -140,7 +140,7 @@ session.catalog.drop_tool("users_by_name_regex", ignore_if_not_exists=True)
 
 ### Step 2b: Create dynamic tools with `@fenic_tool`
 
-Dynamic tools let you expose arbitrary Python logic as an MCP tool. They are defined with the `@fenic_tool` decorator and must return a Fenic `DataFrame`. Annotate parameters with `typing_extensions.Annotated` to provide per-argument descriptions in the tool schema. The server automatically adds `limit` and `table_format` keyword-only parameters for limiting the size of result sets and output formatting.
+Dynamic tools let you expose arbitrary Python logic as an MCP tool. They are defined with the `@fenic_tool` decorator and must return a Fenic `DataFrame`. Annotate parameters with `typing_extensions.Annotated` to provide per-argument descriptions in the tool schema. The server automatically adds `limit` and `table_format` keyword-only parameters for limiting the size of result sets and output formatting -- if the tool handles its own limiting, set `client_limit_parameter` to `False` to disable this behavior. The wrapped function can be async (recommended) or synchronous.
 
 ```python
 from typing_extensions import Annotated
@@ -178,7 +178,7 @@ orders_total = orders.group_by("user_id").agg(
     max_result_limit=100,
     default_table_format="markdown",
 )
-def users_with_min_spend(
+async def users_with_min_spend(
     name_regex: Annotated[Optional[str], "Regex for user name (use (?i) for case-insensitive)"] = None,
     min_total: Annotated[float, "Minimum total order amount"],
 ) -> DataFrame:
 
@@ -15,20 +15,28 @@
 import functools
 import hashlib
 import inspect
-import json
 import re
-from dataclasses import dataclass, asdict
-from typing import Callable, Dict, List, Literal, Optional, TypedDict, Union, Coroutine, Any
+from dataclasses import dataclass
+from inspect import iscoroutinefunction
+from typing import (
+    Any,
+    Callable,
+    Coroutine,
+    Dict,
+    List,
+    Literal,
+    Optional,
+    Union,
+)
 
-from fastmcp.server.context import Context
 import polars as pl
+from fastmcp.server.context import Context
 from typing_extensions import Annotated
 
 from fenic.api.dataframe.dataframe import DataFrame
 from fenic.api.functions import (
     avg,
     col,
-    count,
     stddev,
 )
 from fenic.api.functions import max as max_
@@ -47,6 +55,8 @@
     StringType,
 )
 
+PROFILE_MAX_SAMPLE_SIZE = 10_000
+
 
 @dataclass
 class DatasetSpec:
@@ -101,18 +111,28 @@ def fenic_tool(
     tool_name: str,
     tool_description: str,
     max_result_limit: Optional[int] = None,
+    client_limit_parameter: bool = True,
     default_table_format: TableFormat = "markdown",
     read_only: bool = True,
     idempotent: bool = True,
     destructive: bool = False,
     open_world: bool = False,
-) -> Callable[[Callable[..., Coroutine[Any, Any, DataFrame]]], DynamicToolDefinition]:
+) -> Callable[[
+        Union[
+            Callable[..., Coroutine[Any, Any, DataFrame]],
+            Callable[..., DataFrame]
+        ]], DynamicToolDefinition]:
     """Decorator to bind a DataFrame to a user-authored tool function.
 
+    Can be added to a synchronous or asynchronous (recommended) tool function.
+    Function based tools (dynamic tools) cannot be persisted to the catalog.
+    See the (Fenic MCP documentation)[https://fenic.ai/docs/topics/fenic-mcp] for more details.
+
     Args:
         tool_name: The name of the tool.
         tool_description: The description of the tool.
-        max_result_limit: The maximum number of results to return.
+        max_result_limit: The maximum number of results to return. If omitted, no limit will be enforced.
+        client_limit_parameter: Whether to add a client-side limit parameter to the tool.
         default_table_format: The default table format to return.
         read_only: A hint to provide to the model that the tool does not modify its environment.
         idempotent: A hint to provide to the model that calling the tool multiple times with the same input will always return the same result (redundant if read_only is True).
@@ -136,10 +156,10 @@ def find_rust(
 
     Example: Creating an open-world tool that reaches out to an external API. The open_world flag indicates to the model that the tool may interact with an "open world" of external entities
         @fenic_tool(tool_name="search_knowledge_base", tool_description="...", open_world=True)
-        def search_knowledge_base(
+        async def search_knowledge_base(
             query: Annotated[str, "Knowledge base search query"],
         ) -> DataFrame:
-            results = requests.get(...)
+            results = await requests.get(...)
             return fc.create_dataframe(results)
 
     Notes:
@@ -149,20 +169,26 @@ def search_knowledge_base(
     - The returned object is a DynamicTool ready for registration.
     - A `limit` parameter is automatically added to the function signature, which can be used to limit the number of rows returned up to the tool's `max_result_limit`.
     - A `table_format` parameter is automatically added to the function signature, which can be used to specify the format of the returned data (markdown, structured)
+    - The `add_limit_parameter` flag can be used to control whether the client is allowed to specify a limit parameter.
     """
 
-    def decorator(func: Callable[..., Coroutine[Any, Any, DataFrame]]) -> DynamicToolDefinition:
+    def decorator(
+        func: Union[Callable[..., Coroutine[Any, Any, DataFrame]], Callable[..., DataFrame]]) -> DynamicToolDefinition:
         _ensure_no_var_args(func, func_label=tool_name)
 
         @functools.wraps(func)
         async def wrapper(*args, **kwargs) -> LogicalPlan:
-            result_df = await func(*args, **kwargs)
+            if iscoroutinefunction(func):
+                result_df = await func(*args, **kwargs)
+            else:
+                result_df = await asyncio.to_thread(lambda: func(*args, **kwargs))
             return result_df._logical_plan
 
         return DynamicToolDefinition(
             name=tool_name,
             description=tool_description,
             max_result_limit=max_result_limit,
+            add_limit_parameter=client_limit_parameter,
             default_table_format=default_table_format,
             read_only=read_only,
             idempotent=idempotent,
@@ -296,7 +322,7 @@ async def search_summary(
     )
 
 
-def auto_generate_search_content_tool(
+def _auto_generate_search_content_tool(
     datasets: List[DatasetSpec],
     session: Session,
     tool_name: str,
@@ -464,13 +490,10 @@ async def analyze_func(
             "- For text search, prefer regular expressions using REGEXP_MATCHES().\n",
             "- Paging: use ORDER BY to define row order, then LIMIT and OFFSET for pages.\n",
             f"- Results are limited to {result_limit} rows, use LIMIT/OFFSET to paginate when receiving a result set of {result_limit} or more rows.\n",
-            "Examples:\n",  # nosec B608 - example text only
-            f"- SELECT * FROM {{{example_name}}} WHERE REGEXP_MATCHES(message, '(?i)error|fail') LIMIT {result_limit}",
-            # nosec B608 - example text only
-            f"- SELECT dept, COUNT(*) AS n FROM {{{example_name}}} WHERE status = 'active' GROUP BY dept HAVING n > 10 ORDER BY n DESC LIMIT {result_limit}",
-            # nosec B608 - example text only
-            f"- Paging: page 2 of size {result_limit}\n  SELECT * FROM {{{example_name}}} ORDER BY created_at DESC LIMIT {result_limit} OFFSET {result_limit}",
-            # nosec B608 - example text only
+            "Examples:\n",
+            f"- SELECT * FROM {{{example_name}}} WHERE REGEXP_MATCHES(message, '(?i)error|fail') LIMIT {result_limit}", # nosec B608 - example text only
+            f"- SELECT dept, COUNT(*) AS n FROM {{{example_name}}} WHERE status = 'active' GROUP BY dept HAVING n > 10 ORDER BY n DESC LIMIT {result_limit}", # nosec B608 - example text only
+            f"- Paging: page 2 of size {result_limit}\n  SELECT * FROM {{{example_name}}} ORDER BY created_at DESC LIMIT {result_limit} OFFSET {result_limit}", # nosec B608 - example text only
         ]
     )
     enhanced_description = "\n".join(lines)
@@ -543,7 +566,7 @@ def _apply_paging(
 
 
 @dataclass
-class ProfileRow:
+class _ProfileRow:
     dataset_name: str
     column_name: str
     data_type: str
@@ -584,26 +607,9 @@ def _auto_generate_profile_tool(
         raise ValueError("Cannot create profile tool: no datasets provided.")
     tool_key = _sanitize_name(tool_name)
 
-    async def _materialize_dataset_description(df: DataFrame, dataset_name: str, view_name: str) -> None:
-        profile_rows = await _compute_profile_rows(df, dataset_name, topk_distinct)
-        pl_df = pl.DataFrame(profile_rows)
-        plan = InMemorySource.from_session_state(pl_df, session._session_state)
-        catalog = session._session_state.catalog
-        catalog.drop_view(view_name, ignore_if_not_exists=True)
-        catalog.create_view(view_name, plan)
-
-    async def _ensure_profile_view_for_dataset(spec: DatasetSpec, refresh: bool) -> LogicalPlan:
-        schema_hash = _schema_fingerprint(spec.df)
-        view_name = f"__fenic_profile__{tool_key}__{_sanitize_name(spec.table_name)}__{schema_hash}"
-        catalog = session._session_state.catalog
-        if refresh or not catalog.does_view_exist(view_name):
-            await _materialize_dataset_description(spec.df, spec.table_name, view_name)
-        return catalog.get_view_plan(view_name)
-
     async def profile_func(
         df_name: Annotated[
             str | None, "Optional DataFrame name to return a single profile for. To return profiles for all datasets, omit this parameter."] = None,
-        refresh: Annotated[bool, "Recompute and refresh cached profile view(s)"] = False,
     ) -> LogicalPlan:
         # sometimes the models get...very confused, and pass the null string instead of `null` or omitting the field entirely
         if not df_name or df_name == "null":
@@ -614,13 +620,12 @@ async def profile_func(
             if spec is None:
                 raise ValidationError(
                     f"Unknown dataset '{df_name}'. Available: {', '.join(d.table_name for d in datasets)}")
-            return await _ensure_profile_view_for_dataset(spec, refresh)
+            return await _ensure_profile_view_for_dataset(session, tool_key, spec, topk_distinct)
 
         # Multi-dataset: concatenate cached views (or compute & cache if missing)
         profile_df = None
         for spec in datasets:
-            # Ensure view exists and read it, then convert to polars for concatenation
-            plan = await _ensure_profile_view_for_dataset(spec, refresh)
+            plan = await _ensure_profile_view_for_dataset(session, tool_key, spec, topk_distinct)
             df = DataFrame._from_logical_plan(plan, session_state=session._session_state)
             if not profile_df:
                 profile_df = df
@@ -636,21 +641,42 @@ async def profile_func(
         max_result_limit=None,
     )
 
+async def _ensure_profile_view_for_dataset(
+    session: Session,
+    tool_key: str,
+    spec: DatasetSpec,
+    topk_distinct: int,
+) -> LogicalPlan:
+    schema_hash = _schema_fingerprint(spec.df)
+    view_name = f"__fenic_profile__{tool_key}__{_sanitize_name(spec.table_name)}__{schema_hash}"
+    catalog = session._session_state.catalog
+    if not catalog.does_view_exist(view_name):
+        profile_rows = await _compute_profile_rows(
+            spec.df,
+            spec.table_name,
+            topk_distinct,
+        )
+        view_plan = InMemorySource.from_session_state(
+            pl.DataFrame(profile_rows), session._session_state,
+        )
+        catalog.create_view(view_name, view_plan)
+    return catalog.get_view_plan(view_name)
+
 async def _compute_profile_rows(
     df: DataFrame,
     dataset_name: str,
-    topk_distinct: int
-) -> List[ProfileRow]:
+    topk_distinct: int,
+) -> List[_ProfileRow]:
     pl_df = df.to_polars()
     total_rows = pl_df.height
-    sampled_df = pl_df.sample(10000)
-    rows_list: List[ProfileRow] = []
+    sampled_df = pl_df.sample(min(total_rows, PROFILE_MAX_SAMPLE_SIZE))
+    rows_list: List[_ProfileRow] = []
     for field in df.schema.column_fields:
         col_name = field.name
         dtype_str = str(field.data_type)
         null_count = sampled_df.select(pl.col(col_name).is_null().sum()).item()
         non_null_count = sampled_df.height - null_count
-        stats: ProfileRow = ProfileRow(
+        stats = _ProfileRow(
             dataset_name=dataset_name,
             column_name=col_name,
             data_type=dtype_str,
@@ -815,8 +841,7 @@ def _auto_generate_core_tools(
             "Return dataset data profile: row_count and per-column stats for any or all of the datasets listed below.",
             "This call should be used as a follow up after calling the `Schema` tool."
             "Numeric stats: min/max/mean/std; Booleans: true/false counts; Strings: distinct_count and top values.",
-            "Results are cached per tool name and schema fingerprint; pass refresh=true to recompute.",
-            "Profiles statistics are calculated across a sample of the original dataset.",
+            "Profiling statistics are calculated across a sample of the original dataset.",
             "Available Datasets:",
             group_desc,
         ]),
@@ -845,7 +870,7 @@ def _auto_generate_core_tools(
             group_desc,
         ]),
     )
-    search_content_tool = auto_generate_search_content_tool(
+    search_content_tool = _auto_generate_search_content_tool(
         datasets,
         session,
         tool_name=f"{tool_group_name} - Search Content",
 
@@ -147,30 +147,24 @@ def _handle_result_set(
         original_result_count = len(pl_df)
         if effective_limit and original_result_count > effective_limit:
             pl_df = pl_df.limit(effective_limit)
+        schema_fields = [{"name": name, "type": str(dtype)} for name, dtype in pl_df.schema.items()]
+        rows_list = pl_df.to_dicts()
+        returned_result_count = len(rows_list)
         if table_format == "structured":
-            rows_list = pl_df.to_dicts()
-            schema_fields = [{"name": name, "type": str(dtype)} for name, dtype in pl_df.schema.items()]
             result_set = MCPResultSet(
                 table_schema=schema_fields,
                 rows=rows_list,
-                returned_result_count=len(rows_list),
+                returned_result_count=returned_result_count,
                 total_result_count=original_result_count,
             )
         else:
-            with pl.Config(
-                tbl_hide_dataframe_shape=True,
-                tbl_cols=-1,
-                tbl_rows=-1,
-                tbl_width_chars=-1,
-                fmt_str_lengths=25000 #TODO(bcallender): make this configurable
-            ):
-                rows = repr(pl_df)
-                result_set = MCPResultSet(
-                    table_schema=None,
-                    rows=rows,
-                    returned_result_count=len(pl_df),
-                    total_result_count=original_result_count,
-                )
+            rows = _render_markdown_preview(rows_list)
+            result_set = MCPResultSet(
+                table_schema=schema_fields,
+                rows=rows,
+                returned_result_count=returned_result_count,
+                total_result_count=original_result_count,
+            )
         return result_set
 
     def _build_parameterized_tool(self, tool: ParameterizedToolDefinition):
 
@@ -1,7 +1,7 @@
 """Exported Types related to Parameterized View/MCP Tool Generation."""
 from __future__ import annotations
 
-from typing import Annotated, Callable, List, Optional, Union, Coroutine, Any
+from typing import Annotated, Any, Callable, Coroutine, List, Optional, Union
 
 from pydantic import BaseModel, ConfigDict, model_validator
 from pydantic.dataclasses import dataclass
@@ -80,16 +80,17 @@ class ParameterizedToolDefinition:
 class DynamicToolDefinition:
     """A tool implemented as a regular Python function with explicit parameters.
 
-    The function must be a `Callable[..., LogicalPlan]`. Collection/formatting is handled by
+    The function must be a `Callable[..., Coroutine[Any, Any, LogicalPlan]]` 
+    (a function defined with `async def`). Collection/formatting is handled by
     the MCP generator wrapper.
     """
     name: str
     description: str
     max_result_limit: Optional[int]
     func: Callable[..., Coroutine[Any, Any, LogicalPlan]]
-    add_limit_parameter: bool = True
+    add_limit_parameter: bool = True 
     default_table_format: TableFormat = "markdown"
-    read_only: Annotated[bool, "A hint to provide to the model that the tool is read-only."] = True
-    idempotent: Annotated[bool, "A hint to provide to the model that the tool is idempotent."] = True
-    destructive: Annotated[bool, "A hint to provide to the model that the tool is destructive."] = False
-    open_world: Annotated[bool, "A hint to provide to the model that the tool reaches out to external endpoints/knowledge bases."] = False
+    read_only: Annotated[bool, "A hint to provide to the client that the tool is read-only."] = True
+    idempotent: Annotated[bool, "A hint to provide to the client that the tool is idempotent."] = True
+    destructive: Annotated[bool, "A hint to provide to the client that the tool is destructive."] = False
+    open_world: Annotated[bool, "A hint to provide to the client that the tool reaches out to external endpoints/knowledge bases."] = False