feat: add plugin for returning GenAI Parts from tools into the model request

RKest · copybara-github · commit 116b26c33e16 · 2025-11-09T11:47:08.000-08:00
Added to mitigate #3064 Co-authored-by: Max Ind <maxind@google.com> PiperOrigin-RevId: 830135940
diff --git a/contributing/samples/multimodal_tool_results/__init__.py b/contributing/samples/multimodal_tool_results/__init__.py
@@ -0,0 +1,15 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import agent
diff --git a/contributing/samples/multimodal_tool_results/agent.py b/contributing/samples/multimodal_tool_results/agent.py
@@ -0,0 +1,41 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from google.adk.agents import LlmAgent
+from google.adk.apps.app import App
+from google.adk.plugins.multimodal_tool_results_plugin import MultimodalToolResultsPlugin
+from google.genai import types
+
+APP_NAME = "multimodal_tool_results"
+USER_ID = "test_user"
+
+
+def get_image():
+  return [types.Part.from_uri(file_uri="gs://replace_with_your_image_uri")]
+
+
+root_agent = LlmAgent(
+    name="image_describing_agent",
+    description="image describing agent",
+    instruction="""Whatever the user says, get the image using the get_image tool, and describe it.""",
+    model="gemini-2.0-flash",
+    tools=[get_image],
+)
+
+
+app = App(
+    name=APP_NAME,
+    root_agent=root_agent,
+    plugins=[MultimodalToolResultsPlugin()],
+)
diff --git a/src/google/adk/plugins/multimodal_tool_results_plugin.py b/src/google/adk/plugins/multimodal_tool_results_plugin.py
@@ -0,0 +1,90 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from typing import Any
+from typing import Optional
+
+from google.genai import types
+
+from ..agents.callback_context import CallbackContext
+from ..models.llm_request import LlmRequest
+from ..models.llm_response import LlmResponse
+from ..tools.base_tool import BaseTool
+from ..tools.tool_context import ToolContext
+from .base_plugin import BasePlugin
+
+PARTS_RETURNED_BY_TOOLS_ID = "temp:PARTS_RETURNED_BY_TOOLS_ID"
+
+
+class MultimodalToolResultsPlugin(BasePlugin):
+  """A plugin that modifies function tool responses to support returning list of parts directly.
+
+  Should be removed in favor of directly supporting FunctionResponsePart when these
+  are supported outside of computer use tool.
+  For context see: https://github.com/google/adk-python/issues/3064#issuecomment-3463067459
+  """
+
+  def __init__(self, name: str = "multimodal_tool_results_plugin"):
+    """Initialize the multimodal tool results plugin.
+
+    Args:
+      name: The name of the plugin instance.
+    """
+    super().__init__(name)
+
+  async def after_tool_callback(
+      self,
+      *,
+      tool: BaseTool,
+      tool_args: dict[str, Any],
+      tool_context: ToolContext,
+      result: dict,
+  ) -> Optional[dict]:
+    """Saves parts returned by the tool in ToolContext.
+
+    Later these are passed to LLM's context as-is.
+    No-op if tool doesn't return list[google.genai.types.Part] or google.genai.types.Part.
+    """
+
+    if not (
+        isinstance(result, types.Part)
+        or isinstance(result, list)
+        and result
+        and isinstance(result[0], types.Part)
+    ):
+      return result
+
+    parts = [result] if isinstance(result, types.Part) else result[:]
+
+    if PARTS_RETURNED_BY_TOOLS_ID in tool_context.state:
+      tool_context.state[PARTS_RETURNED_BY_TOOLS_ID] += parts
+    else:
+      tool_context.state[PARTS_RETURNED_BY_TOOLS_ID] = parts
+
+    return None
+
+  async def before_model_callback(
+      self, *, callback_context: CallbackContext, llm_request: LlmRequest
+  ) -> Optional[LlmResponse]:
+    """Attach saved list[google.genai.types.Part] returned by the tool to llm_request."""
+
+    if saved_parts := callback_context.state.get(
+        PARTS_RETURNED_BY_TOOLS_ID, None
+    ):
+      llm_request.contents[-1].parts += saved_parts
+      callback_context.state.update({PARTS_RETURNED_BY_TOOLS_ID: []})
+
+    return None
diff --git a/tests/unittests/plugins/test_multimodal_tool_results_plugin.py b/tests/unittests/plugins/test_multimodal_tool_results_plugin.py
@@ -0,0 +1,154 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from typing import Any
+from unittest.mock import Mock
+
+from google.adk.agents.base_agent import BaseAgent
+from google.adk.agents.callback_context import CallbackContext
+from google.adk.models.llm_request import LlmRequest
+from google.adk.plugins.multimodal_tool_results_plugin import MultimodalToolResultsPlugin
+from google.adk.plugins.multimodal_tool_results_plugin import PARTS_RETURNED_BY_TOOLS_ID
+from google.adk.tools.base_tool import BaseTool
+from google.adk.tools.tool_context import ToolContext
+from google.genai import types
+import pytest
+
+from .. import testing_utils
+
+
+@pytest.fixture
+def plugin() -> MultimodalToolResultsPlugin:
+  """Create a default plugin instance for testing."""
+  return MultimodalToolResultsPlugin()
+
+
+@pytest.fixture
+def mock_tool() -> MockTool:
+  """Create a mock tool for testing."""
+  return Mock(spec=BaseTool)
+
+
+@pytest.fixture
+async def tool_context() -> ToolContext:
+  """Create a mock tool context."""
+  return ToolContext(
+      invocation_context=await testing_utils.create_invocation_context(
+          agent=Mock(spec=BaseAgent)
+      )
+  )
+
+
+@pytest.mark.asyncio
+async def test_tool_returning_parts_are_added_to_llm_request(
+    plugin: MultimodalToolResultsPlugin,
+    mock_tool: MockTool,
+    tool_context: ToolContext,
+):
+  """Test that parts returned by a tool are present in the llm_request later."""
+  parts = [types.Part(text="part1"), types.Part(text="part2")]
+
+  result = await plugin.after_tool_callback(
+      tool=mock_tool,
+      tool_args={},
+      tool_context=tool_context,
+      result=parts,
+  )
+
+  assert result == None
+  assert PARTS_RETURNED_BY_TOOLS_ID in tool_context.state
+  assert tool_context.state[PARTS_RETURNED_BY_TOOLS_ID] == parts
+
+  callback_context = Mock(spec=CallbackContext)
+  callback_context.state = tool_context.state
+  llm_request = LlmRequest(contents=[types.Content(parts=[])])
+
+  await plugin.before_model_callback(
+      callback_context=callback_context, llm_request=llm_request
+  )
+
+  assert llm_request.contents[-1].parts == parts
+
+
+@pytest.mark.asyncio
+async def test_tool_returning_non_list_of_parts_is_unchanged(
+    plugin: MultimodalToolResultsPlugin,
+    mock_tool: MockTool,
+    tool_context: ToolContext,
+):
+  """Test where tool returning non list of parts, has this result unchanged."""
+  original_result = {"some": "data"}
+
+  result = await plugin.after_tool_callback(
+      tool=mock_tool,
+      tool_args={},
+      tool_context=tool_context,
+      result=original_result,
+  )
+
+  assert result == original_result
+  assert PARTS_RETURNED_BY_TOOLS_ID not in tool_context.state
+
+  callback_context = Mock(spec=CallbackContext)
+  callback_context.state = tool_context.state
+  llm_request = LlmRequest(
+      contents=[types.Content(parts=[types.Part(text="original")])]
+  )
+  original_parts = list(llm_request.contents[-1].parts)
+
+  await plugin.before_model_callback(
+      callback_context=callback_context, llm_request=llm_request
+  )
+
+  assert llm_request.contents[-1].parts == original_parts
+
+
+@pytest.mark.asyncio
+async def test_multiple_tools_returning_parts_are_accumulated(
+    plugin: ToolReturningGenAiPartsPlugin,
+    mock_tool: MockTool,
+    tool_context: ToolContext,
+):
+  """Test that parts from multiple tool calls are accumulated."""
+  parts1 = [types.Part(text="part1")]
+  parts2 = [types.Part(text="part2")]
+
+  await plugin.after_tool_callback(
+      tool=mock_tool,
+      tool_args={},
+      tool_context=tool_context,
+      result=parts1,
+  )
+
+  await plugin.after_tool_callback(
+      tool=mock_tool,
+      tool_args={},
+      tool_context=tool_context,
+      result=parts2,
+  )
+
+  assert PARTS_RETURNED_BY_TOOLS_ID in tool_context.state
+  assert tool_context.state[PARTS_RETURNED_BY_TOOLS_ID] == parts1 + parts2
+
+  callback_context = Mock(spec=CallbackContext)
+  callback_context.state = tool_context.state
+  llm_request = LlmRequest(contents=[types.Content(parts=[])])
+
+  await plugin.before_model_callback(
+      callback_context=callback_context, llm_request=llm_request
+  )
+
+  assert llm_request.contents[-1].parts == parts1 + parts2