diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py index 5c38860eec..bf62d83edb 100644 --- a/pydantic_ai_slim/pydantic_ai/messages.py +++ b/pydantic_ai_slim/pydantic_ai/messages.py @@ -458,6 +458,27 @@ def format(self) -> DocumentFormat: except KeyError as e: raise ValueError(f'Unknown document media type: {media_type}') from e + @staticmethod + def is_text_like_media_type(media_type: str) -> bool: + return ( + media_type.startswith('text/') + or media_type == 'application/json' + or media_type.endswith('+json') + or media_type == 'application/xml' + or media_type.endswith('+xml') + or media_type in ('application/x-yaml', 'application/yaml') + ) + + @staticmethod + def inline_text_file_part(text: str, *, media_type: str, identifier: str): + text = '\n'.join( + [ + f'-----BEGIN FILE id="{identifier}" type="{media_type}"-----', + text, + f'-----END FILE id="{identifier}"-----', + ] + ) + return {'text': text} @dataclass(init=False, repr=False) class BinaryContent: @@ -513,8 +534,16 @@ def narrow_type(bc: BinaryContent) -> BinaryContent | BinaryImage: identifier=bc.identifier, vendor_metadata=bc.vendor_metadata, ) - else: - return bc + else: + return bc # pragma: no cover + + @staticmethod + def is_text_like_media_type(mediatype: str) -> bool: + return DocumentUrl.is_text_like_media_type(mediatype) + + @staticmethod + def inline_text_file_part(text: str, *, media_type: str, identifier: str): + return DocumentUrl.inline_text_file_part(text, media_type=media_type, identifier=identifier) @classmethod def from_data_uri(cls, data_uri: str) -> BinaryContent: diff --git a/pydantic_ai_slim/pydantic_ai/models/google.py b/pydantic_ai_slim/pydantic_ai/models/google.py index 4978f90e8b..ce2e2c2d61 100644 --- a/pydantic_ai_slim/pydantic_ai/models/google.py +++ b/pydantic_ai_slim/pydantic_ai/models/google.py @@ -19,6 +19,7 @@ BinaryContent, BuiltinToolCallPart, BuiltinToolReturnPart, + DocumentUrl, FilePart, FileUrl, FinishReason, @@ -565,17 +566,46 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]: if isinstance(item, str): content.append({'text': item}) elif isinstance(item, BinaryContent): - inline_data_dict: BlobDict = {'data': item.data, 'mime_type': item.media_type} - part_dict: PartDict = {'inline_data': inline_data_dict} - if item.vendor_metadata: - part_dict['video_metadata'] = cast(VideoMetadataDict, item.vendor_metadata) - content.append(part_dict) + if BinaryContent.is_text_like_media_type(item.media_type): + content.append( + BinaryContent.inline_text_file_part( + item.data.decode('utf-8'), + media_type=item.media_type, + identifier=item.identifier, + ) + ) + else: + inline_data_dict: BlobDict = {'data': item.data, 'mime_type': item.media_type} + part_dict: PartDict = {'inline_data': inline_data_dict} + if item.vendor_metadata: + part_dict['video_metadata'] = cast(VideoMetadataDict, item.vendor_metadata) + content.append(part_dict) + + elif isinstance(item, DocumentUrl): + if DocumentUrl.is_text_like_media_type(item.media_type): + downloaded_text = await download_item(item, data_format='text') + content.append( + DocumentUrl.inline_text_file_part( + downloaded_text['data'], + media_type=item.media_type, + identifier=item.identifier, + ) + ) + else: + downloaded_item = await download_item(item, data_format='bytes') + inline_data_dict: BlobDict = { + 'data': downloaded_item['data'], + 'mime_type': downloaded_item['data_type'], + } + content.append({'inline_data': inline_data_dict}) + elif isinstance(item, VideoUrl) and item.is_youtube: file_data_dict: FileDataDict = {'file_uri': item.url, 'mime_type': item.media_type} part_dict: PartDict = {'file_data': file_data_dict} if item.vendor_metadata: # pragma: no branch part_dict['video_metadata'] = cast(VideoMetadataDict, item.vendor_metadata) content.append(part_dict) + elif isinstance(item, FileUrl): if item.force_download or ( # google-gla does not support passing file urls directly, except for youtube videos @@ -594,7 +624,8 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]: content.append({'file_data': file_data_dict}) # pragma: lax no cover else: assert_never(item) - return content + + return content def _map_response_schema(self, o: OutputObjectDefinition) -> dict[str, Any]: response_schema = o.json_schema.copy() diff --git a/tests/models/cassettes/test_google/test_google_model_json_document_url_input.yaml b/tests/models/cassettes/test_google/test_google_model_json_document_url_input.yaml new file mode 100644 index 0000000000..b55747d627 --- /dev/null +++ b/tests/models/cassettes/test_google/test_google_model_json_document_url_input.yaml @@ -0,0 +1,176 @@ +interactions: +- request: + body: '' + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate + connection: + - keep-alive + host: + - kamalscraping-collab.github.io + method: GET + uri: https://kamalscraping-collab.github.io/sample-data/sample_transcript.json + response: + body: + string: |- + { + "items": [ + { + "id": "GR_ad8d2a461fc5", + "type": "message", + "role": "assistant", + "content": [ + "Hello, how can I help you today?" + ], + "interrupted": false + }, + { + "id": "item_13ecd51e0dcc", + "type": "function_call", + "call_id": "function-call-18124021183837676163", + "arguments": "{\"location\": \"Kampala, Uganda\"}", + "name": "lookup_weather" + }, + { + "id": "GI_14a70e7c2d20", + "type": "message", + "role": "user", + "content": [ + "Haide, can you please tell me the weather in compiler Uganda" + ], + "interrupted": false + }, + { + "id": "item_000f739d4414", + "type": "function_call_output", + "name": "lookup_weather", + "call_id": "function-call-18124021183837676163", + "output": "{'weather': 'sunny', 'temperature_f': 70}", + "is_error": false + }, + { + "id": "GR_95c91db6b975", + "type": "message", + "role": "assistant", + "content": [ + "The weather in Kampala, Uganda is sunny with a temperature of 70 degrees Fahrenheit." + ], + "interrupted": false + }, + { + "id": "GI_c8cc9177073f", + "type": "message", + "role": "user", + "content": [ + "what can you please tell me what are the best things to do in compiler you're" + ], + "interrupted": false + }, + { + "id": "GR_792c5f6fbc89", + "type": "message", + "role": "assistant", + "content": [ + "While I can tell you the weather, I'm not able to provide information on the best things to do in a specific location. Is there anything else I can help you with?" + ], + "interrupted": false + } + ] + } + headers: + cache-control: + - max-age=604800 + - public + connection: + - keep-alive + content-length: + - '2574' + content-type: + - text/plain; charset=UTF-8 + etag: + - W/"61efea10-a0e" + expires: + - Fri, 26 Dec 2025 16:42:28 GMT + last-modified: + - Tue, 25 Jan 2022 12:16:16 GMT + strict-transport-security: + - max-age=15552000; includeSubDomains + transfer-encoding: + - chunked + vary: + - Accept-Encoding + status: + code: 200 + message: OK +- request: + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '3701' + content-type: + - application/json + host: + - generativelanguage.googleapis.com + method: POST + parsed_body: + contents: + - parts: + - text: What is the main content on this document? + - inlineData: + data: VFhUIHRlc3QgZmlsZQpQdXJwb3NlOiBQcm92aWRlIGV4YW1wbGUgb2YgdGhpcyBmaWxlIHR5cGUKRG9jdW1lbnQgZmlsZSB0eXBlOiBUWFQKVmVyc2lvbjogMS4wClJlbWFyazoKCkV4YW1wbGUgY29udGVudDoKVGhlIG5hbWVzICJKb2huIERvZSIgZm9yIG1hbGVzLCAiSmFuZSBEb2UiIG9yICJKYW5lIFJvZSIgZm9yIGZlbWFsZXMsIG9yICJKb25uaWUgRG9lIiBhbmQgIkphbmllIERvZSIgZm9yIGNoaWxkcmVuLCBvciBqdXN0ICJEb2UiIG5vbi1nZW5kZXItc3BlY2lmaWNhbGx5IGFyZSB1c2VkIGFzIHBsYWNlaG9sZGVyIG5hbWVzIGZvciBhIHBhcnR5IHdob3NlIHRydWUgaWRlbnRpdHkgaXMgdW5rbm93biBvciBtdXN0IGJlIHdpdGhoZWxkIGluIGEgbGVnYWwgYWN0aW9uLCBjYXNlLCBvciBkaXNjdXNzaW9uLiBUaGUgbmFtZXMgYXJlIGFsc28gdXNlZCB0byByZWZlciB0byBhY29ycHNlIG9yIGhvc3BpdGFsIHBhdGllbnQgd2hvc2UgaWRlbnRpdHkgaXMgdW5rbm93bi4gVGhpcyBwcmFjdGljZSBpcyB3aWRlbHkgdXNlZCBpbiB0aGUgVW5pdGVkIFN0YXRlcyBhbmQgQ2FuYWRhLCBidXQgaXMgcmFyZWx5IHVzZWQgaW4gb3RoZXIgRW5nbGlzaC1zcGVha2luZyBjb3VudHJpZXMgaW5jbHVkaW5nIHRoZSBVbml0ZWQgS2luZ2RvbSBpdHNlbGYsIGZyb20gd2hlcmUgdGhlIHVzZSBvZiAiSm9obiBEb2UiIGluIGEgbGVnYWwgY29udGV4dCBvcmlnaW5hdGVzLiBUaGUgbmFtZXMgSm9lIEJsb2dncyBvciBKb2huIFNtaXRoIGFyZSB1c2VkIGluIHRoZSBVSyBpbnN0ZWFkLCBhcyB3ZWxsIGFzIGluIEF1c3RyYWxpYSBhbmQgTmV3IFplYWxhbmQuCgpKb2huIERvZSBpcyBzb21ldGltZXMgdXNlZCB0byByZWZlciB0byBhIHR5cGljYWwgbWFsZSBpbiBvdGhlciBjb250ZXh0cyBhcyB3ZWxsLCBpbiBhIHNpbWlsYXIgbWFubmVyIHRvIEpvaG4gUS4gUHVibGljLCBrbm93biBpbiBHcmVhdCBCcml0YWluIGFzIEpvZSBQdWJsaWMsIEpvaG4gU21pdGggb3IgSm9lIEJsb2dncy4gRm9yIGV4YW1wbGUsIHRoZSBmaXJzdCBuYW1lIGxpc3RlZCBvbiBhIGZvcm0gaXMgb2Z0ZW4gSm9obiBEb2UsIGFsb25nIHdpdGggYSBmaWN0aW9uYWwgYWRkcmVzcyBvciBvdGhlciBmaWN0aW9uYWwgaW5mb3JtYXRpb24gdG8gcHJvdmlkZSBhbiBleGFtcGxlIG9mIGhvdyB0byBmaWxsIGluIHRoZSBmb3JtLiBUaGUgbmFtZSBpcyBhbHNvIHVzZWQgZnJlcXVlbnRseSBpbiBwb3B1bGFyIGN1bHR1cmUsIGZvciBleGFtcGxlIGluIHRoZSBGcmFuayBDYXByYSBmaWxtIE1lZXQgSm9obiBEb2UuIEpvaG4gRG9lIHdhcyBhbHNvIHRoZSBuYW1lIG9mIGEgMjAwMiBBbWVyaWNhbiB0ZWxldmlzaW9uIHNlcmllcy4KClNpbWlsYXJseSwgYSBjaGlsZCBvciBiYWJ5IHdob3NlIGlkZW50aXR5IGlzIHVua25vd24gbWF5IGJlIHJlZmVycmVkIHRvIGFzIEJhYnkgRG9lLiBBIG5vdG9yaW91cyBtdXJkZXIgY2FzZSBpbiBLYW5zYXMgQ2l0eSwgTWlzc291cmksIHJlZmVycmVkIHRvIHRoZSBiYWJ5IHZpY3RpbSBhcyBQcmVjaW91cyBEb2UuIE90aGVyIHVuaWRlbnRpZmllZCBmZW1hbGUgbXVyZGVyIHZpY3RpbXMgYXJlIENhbGkgRG9lIGFuZCBQcmluY2VzcyBEb2UuIEFkZGl0aW9uYWwgcGVyc29ucyBtYXkgYmUgY2FsbGVkIEphbWVzIERvZSwgSnVkeSBEb2UsIGV0Yy4gSG93ZXZlciwgdG8gYXZvaWQgcG9zc2libGUgY29uZnVzaW9uLCBpZiB0d28gYW5vbnltb3VzIG9yIHVua25vd24gcGFydGllcyBhcmUgY2l0ZWQgaW4gYSBzcGVjaWZpYyBjYXNlIG9yIGFjdGlvbiwgdGhlIHN1cm5hbWVzIERvZSBhbmQgUm9lIG1heSBiZSB1c2VkIHNpbXVsdGFuZW91c2x5OyBmb3IgZXhhbXBsZSwgIkpvaG4gRG9lIHYuIEphbmUgUm9lIi4gSWYgc2V2ZXJhbCBhbm9ueW1vdXMgcGFydGllcyBhcmUgcmVmZXJlbmNlZCwgdGhleSBtYXkgc2ltcGx5IGJlIGxhYmVsbGVkIEpvaG4gRG9lICMxLCBKb2huIERvZSAjMiwgZXRjLiAodGhlIFUuUy4gT3BlcmF0aW9uIERlbGVnbyBjaXRlZCAyMSAobnVtYmVyZWQpICJKb2huIERvZSJzKSBvciBsYWJlbGxlZCB3aXRoIG90aGVyIHZhcmlhbnRzIG9mIERvZSAvIFJvZSAvIFBvZSAvIGV0Yy4gT3RoZXIgZWFybHkgYWx0ZXJuYXRpdmVzIHN1Y2ggYXMgSm9obiBTdGlsZXMgYW5kIFJpY2hhcmQgTWlsZXMgYXJlIG5vdyByYXJlbHkgdXNlZCwgYW5kIE1hcnkgTWFqb3IgaGFzIGJlZW4gdXNlZCBpbiBzb21lIEFtZXJpY2FuIGZlZGVyYWwgY2FzZXMuCgoKCkZpbGUgY3JlYXRlZCBieSBodHRwczovL3d3dy5vbmxpbmUtY29udmVydC5jb20KTW9yZSBleGFtcGxlIGZpbGVzOiBodHRwczovL3d3dy5vbmxpbmUtY29udmVydC5jb20vZmlsZS10eXBlClRleHQgb2YgRXhhbXBsZSBjb250ZW50OiBXaWtpcGVkaWEgKGh0dHBzOi8vZW4ud2lraXBlZGlhLm9yZy93aWtpL0pvaG5fRG9lKQpMaWNlbnNlOiBBdHRyaWJ1dGlvbi1TaGFyZUFsaWtlIDQuMCAoaHR0cHM6Ly9jcmVhdGl2ZWNvbW1vbnMub3JnL2xpY2Vuc2VzL2J5LXNhLzQuMC8pCgpGZWVsIGZyZWUgdG8gdXNlIGFuZCBzaGFyZSB0aGUgZmlsZSBhY2NvcmRpbmcgdG8gdGhlIGxpY2Vuc2UgYWJvdmUu + mimeType: application/json + role: user + generationConfig: {} + systemInstruction: + parts: + - text: You are a helpful chatbot. + role: user + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent + response: + headers: + alt-svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + content-length: + - '985' + content-type: + - application/json; charset=UTF-8 + server-timing: + - gfet4t7; dur=888 + transfer-encoding: + - chunked + vary: + - Origin + - X-Origin + - Referer + parsed_body: + candidates: + - avgLogprobs: -0.5004191543116714 + content: + parts: + - text: | + Based on the JSON data provided, the document contains the log of a conversation between a user and an AI assistant. + role: model + finishReason: STOP + modelVersion: gemini-2.5-pro + responseId: 9YfNaLGGDuOmqtsPoLXu4AQ + usageMetadata: + candidatesTokenCount: 66 + candidatesTokensDetails: + - modality: TEXT + tokenCount: 66 + promptTokenCount: 614 + promptTokensDetails: + - modality: TEXT + tokenCount: 614 + totalTokenCount: 680 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/models/test_google.py b/tests/models/test_google.py index 6ca78d4484..c5d69c5b50 100644 --- a/tests/models/test_google.py +++ b/tests/models/test_google.py @@ -835,6 +835,18 @@ async def test_google_model_text_document_url_input(allow_model_requests: None, ) +async def test_google_model_json_document_url_input(allow_model_requests: None, google_provider: GoogleProvider): + m = GoogleModel('gemini-2.5-pro', provider=google_provider) + agent = Agent(m, system_prompt='You are a helpful chatbot.') + + json_document_url = DocumentUrl(url='https://kamalscraping-collab.github.io/sample-data/sample_transcript.json') + + result = await agent.run(['What is the main content of this document?', json_document_url]) + assert result.output == snapshot( + 'Based on the JSON data provided, the document contains the log of a conversation between a user and an AI assistant.\n' + ) + + async def test_google_model_text_as_binary_content_input(allow_model_requests: None, google_provider: GoogleProvider): m = GoogleModel('gemini-2.0-flash', provider=google_provider) agent = Agent(m, system_prompt='You are a helpful chatbot.')