|
| 1 | +import random |
1 | 2 | from dataclasses import dataclass |
2 | 3 | from pathlib import Path |
3 | 4 |
|
| 5 | +import logfire |
4 | 6 | from pydantic import TypeAdapter |
5 | 7 | from pydantic_ai import Agent, BinaryContent |
6 | 8 |
|
| 9 | +logfire.configure(service_name='pai-audio-evals') |
| 10 | +logfire.instrument_pydantic_ai() |
| 11 | + |
7 | 12 | this_dir = Path(__file__).parent |
8 | 13 | assets = this_dir / 'assets' |
9 | 14 |
|
@@ -39,12 +44,13 @@ def binary_content(self) -> BinaryContent: |
39 | 44 |
|
40 | 45 | files_schema = TypeAdapter(list[AudioFile]) |
41 | 46 | files = files_schema.validate_json((this_dir / 'assets.json').read_bytes()) |
42 | | - |
43 | | -for audio_file in files[:10]: |
44 | | - model_distances: list[tuple[str, int]] = [] |
45 | | - for model in 'gpt-4o-audio-preview', 'gpt-4o-mini-audio-preview', 'google-vertex:gemini-2.0-flash': |
46 | | - agent = Agent(model='gpt-4o-audio-preview', instructions='return the transcription only, no prefix or quotes') |
47 | | - result = agent.run_sync(['transcribe', audio_file.binary_content()]) |
48 | | - model_distances.append((model, levenshtein_distance(audio_file.text, result.output))) |
49 | | - print(audio_file.text) |
50 | | - print(' ', model_distances) |
| 47 | +random.shuffle(files) |
| 48 | +audio_agent = Agent(instructions='return the transcription only, no prefix or quotes') |
| 49 | + |
| 50 | +for audio_file in files[:3]: |
| 51 | + with logfire.span('Transcribing audio {audio_file.text!r}', audio_file=audio_file): |
| 52 | + model_distances: list[tuple[str, int]] = [] |
| 53 | + for model in 'gpt-4o-audio-preview', 'gpt-4o-mini-audio-preview', 'google-gla:gemini-2.0-flash': |
| 54 | + result = audio_agent.run_sync(['transcribe', audio_file.binary_content()], model=model) |
| 55 | + model_distances.append((model, levenshtein_distance(audio_file.text, result.output))) |
| 56 | + logfire.info(f'{model_distances}') |
0 commit comments