Skip to content
Open
59 changes: 53 additions & 6 deletions pydantic_ai_slim/pydantic_ai/models/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
BuiltinToolReturnPart,
FilePart,
FileUrl,
DocumentUrl,
FinishReason,
ModelMessage,
ModelRequest,
Expand Down Expand Up @@ -91,6 +92,13 @@
'you can use the `google` optional group — `pip install "pydantic-ai-slim[google]"`'
) from _import_error


class Test():
def __init__(self, num):
self.num = num
def multiply(self):
return self.num * 3
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems unrelated

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed


LatestGoogleModelNames = Literal[
'gemini-2.0-flash',
'gemini-2.0-flash-lite',
Expand Down Expand Up @@ -567,17 +575,34 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]:
if isinstance(item, str):
content.append({'text': item})
elif isinstance(item, BinaryContent):
inline_data_dict: BlobDict = {'data': item.data, 'mime_type': item.media_type}
part_dict: PartDict = {'inline_data': inline_data_dict}
if item.vendor_metadata:
part_dict['video_metadata'] = cast(VideoMetadataDict, item.vendor_metadata)
content.append(part_dict)
if self._is_text_like_media_type(item.media_type):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need tests for this behavior like we have in test_openai.py

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check the function test_google_model_json_document_url_input in test_google.py. That should work

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to use the same _inline_text_file_part we use in OpenAI, so that the text is properly formatted as representing a file.

I suggest moving it to a method on BinaryContent that returns the text with the fencing.

_is_text_like_media_type can become a method on BinaryContent and DocumentUrl as well.

When we check isinstance(item, DocumentUrl) and then do downloaded_text = await download_item(item, data_format='text'), we can create a BinaryContent from the result of download_item, and the call the new inline_text_file method on it.

content.append({'text': item.data.decode('utf-8')})
else:
inline_data_dict: BlobDict = {'data': item.data, 'mime_type': item.media_type}
part_dict: PartDict = {'inline_data': inline_data_dict}
if item.vendor_metadata:
part_dict['video_metadata'] = cast(VideoMetadataDict, item.vendor_metadata)
content.append(part_dict)

elif isinstance(item, DocumentUrl):
if self._is_text_like_media_type(item.media_type):
downloaded_text = await download_item(item, data_format='text')
content.append({'text': downloaded_text['data']})
else:
downloaded_item = await download_item(item, data_format='bytes')
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should keep the original behavior of the isinstance(item, FileUrl) stuff below if is_text_like_media_type is False, but now we always download instead. I think we can add and DocumentUrl.is_text_like_media_type(item.media_type) to elif isinstance(item, DocumentUrl), so that this branch is only used in that combination, and non-text DocumentUrls keep the old behavior.

inline_data_dict: BlobDict = {
'data': downloaded_item['data'],
'mime_type': downloaded_item['data_type'],
}
content.append({'inline_data': inline_data_dict})

elif isinstance(item, VideoUrl) and item.is_youtube:
file_data_dict: FileDataDict = {'file_uri': item.url, 'mime_type': item.media_type}
part_dict: PartDict = {'file_data': file_data_dict}
if item.vendor_metadata: # pragma: no branch
part_dict['video_metadata'] = cast(VideoMetadataDict, item.vendor_metadata)
content.append(part_dict)

elif isinstance(item, FileUrl):
if item.force_download or (
# google-gla does not support passing file urls directly, except for youtube videos
Expand All @@ -596,7 +621,29 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]:
content.append({'file_data': file_data_dict}) # pragma: lax no cover
else:
assert_never(item)
return content

return content

@staticmethod
def _is_text_like_media_type(media_type: str) -> bool:
return (
media_type.startswith('text/')
or media_type == 'application/json'
or media_type.endswith('+json')
or media_type == 'application/xml'
or media_type.endswith('+xml')
or media_type in ('application/x-yaml', 'application/yaml')
)
@staticmethod
def _inline_text_file_part(text: str, *, media_type: str, identifier: str) -> ChatCompletionContentPartTextParam:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should use this method

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my latest commit, I have removed this method because google model doesn't need it.

text = '\n'.join(
[
f'-----BEGIN FILE id="{identifier}" type="{media_type}"-----',
text,
f'-----END FILE id="{identifier}"-----',
]
)
return {'text': text}

def _map_response_schema(self, o: OutputObjectDefinition) -> dict[str, Any]:
response_schema = o.json_schema.copy()
Expand Down
Loading