Skip to content

Commit 6fd45a8

Browse files
olivier-drieuxOlivier Drieux
andauthored
fix(OpenAI): Handle diarized json response in TranscriptionResponseSegment (#706)
Co-authored-by: Olivier Drieux <olivier.drieux@linkweb.fr>
1 parent 6b933f0 commit 6fd45a8

File tree

5 files changed

+122
-27
lines changed

5 files changed

+122
-27
lines changed

src/Responses/Audio/TranscriptionResponse.php

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,12 @@
1212
use OpenAI\Testing\Responses\Concerns\Fakeable;
1313

1414
/**
15-
* @implements ResponseContract<array{task: ?string, language: ?string, duration: ?float, segments: array<int, array{id: int, seek: int, start: float, end: float, text: string, tokens: array<int, int>, temperature: float, avg_logprob: float, compression_ratio: float, no_speech_prob: float, transient?: bool}>, words: array<int, array{word: string, start: float, end: float}>, text: string}>
15+
* @implements ResponseContract<array{task: ?string, language: ?string, duration: ?float, segments: array<int, array{id: int|string, start: float, end: float, text: string, seek?: int, tokens?: array<int, int>, temperature?: float, avg_logprob?: float, compression_ratio?: float, no_speech_prob?: float, transient?: bool, speaker?: string, type?: string}>, words: array<int, array{word: string, start: float, end: float}>, text: string}>
1616
*/
1717
final class TranscriptionResponse implements ResponseContract, ResponseHasMetaInformationContract
1818
{
1919
/**
20-
* @use ArrayAccessible<array{task: ?string, language: ?string, duration: ?float, segments: array<int, array{id: int, seek: int, start: float, end: float, text: string, tokens: array<int, int>, temperature: float, avg_logprob: float, compression_ratio: float, no_speech_prob: float, transient?: bool}>, words: array<int, array{word: string, start: float, end: float}>, text: string}>
20+
* @use ArrayAccessible<array{task: ?string, language: ?string, duration: ?float, segments: array<int, array{id: int|string, start: float, end: float, text: string, seek?: int, tokens?: array<int, int>, temperature?: float, avg_logprob?: float, compression_ratio?: float, no_speech_prob?: float, transient?: bool, speaker?: string, type?: string}>, words: array<int, array{word: string, start: float, end: float}>, text: string}>
2121
*/
2222
use ArrayAccessible;
2323

@@ -41,7 +41,7 @@ private function __construct(
4141
/**
4242
* Acts as static factory, and returns a new Response instance.
4343
*
44-
* @param array{task: ?string, language: ?string, duration: ?float, segments: array<int, array{id: int, seek: int, start: float, end: float, text: string, tokens: array<int, int>, temperature: float, avg_logprob: float, compression_ratio: float, no_speech_prob: float, transient?: bool}>, words: array<int, array{word: string, start: float, end: float}>, text: string}|string $attributes
44+
* @param array{task: ?string, language: ?string, duration: ?float, segments: array<int, array{id: int|string, start: float, end: float, text: string, seek?: int, tokens?: array<int, int>, temperature?: float, avg_logprob?: float, compression_ratio?: float, no_speech_prob?: float, transient?: bool, speaker?: string, type?: string}>, words: array<int, array{word: string, start: float, end: float}>, text: string}|string $attributes
4545
*/
4646
public static function from(array|string $attributes, MetaInformation $meta): self
4747
{

src/Responses/Audio/TranscriptionResponseSegment.php

Lines changed: 53 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,51 +8,56 @@
88
use OpenAI\Responses\Concerns\ArrayAccessible;
99

1010
/**
11-
* @implements ResponseContract<array{id: int, seek: int, start: float, end: float, text: string, tokens: array<int, int>, temperature: float, avg_logprob: float, compression_ratio: float, no_speech_prob: float, transient?: bool}>
11+
* @implements ResponseContract<array{id: int|string, start: float, end: float, text: string, seek?: int, tokens?: array<int, int>, temperature?: float, avg_logprob?: float, compression_ratio?: float, no_speech_prob?: float, transient?: bool, speaker?: string, type?: string}>
1212
*/
1313
final class TranscriptionResponseSegment implements ResponseContract
1414
{
1515
/**
16-
* @use ArrayAccessible<array{id: int, seek: int, start: float, end: float, text: string, tokens: array<int, int>, temperature: float, avg_logprob: float, compression_ratio: float, no_speech_prob: float, transient?: bool}>
16+
* @use ArrayAccessible<array{id: int|string, start: float, end: float, text: string, seek?: int, tokens?: array<int, int>, temperature?: float, avg_logprob?: float, compression_ratio?: float, no_speech_prob?: float, transient?: bool, speaker?: string, type?: string}>
1717
*/
1818
use ArrayAccessible;
1919

2020
/**
21+
* @param int|string $id string in case of diarization, int otherwise
2122
* @param array<int, int> $tokens
2223
*/
2324
private function __construct(
24-
public readonly int $id,
25-
public readonly int $seek,
25+
public readonly int|string $id,
2626
public readonly float $start,
2727
public readonly float $end,
2828
public readonly string $text,
29-
public readonly array $tokens,
30-
public readonly float $temperature,
31-
public readonly float $avgLogprob,
32-
public readonly float $compressionRatio,
33-
public readonly float $noSpeechProb,
29+
public readonly ?int $seek,
30+
public readonly ?array $tokens,
31+
public readonly ?float $temperature,
32+
public readonly ?float $avgLogprob,
33+
public readonly ?float $compressionRatio,
34+
public readonly ?float $noSpeechProb,
3435
public readonly ?bool $transient,
36+
public readonly ?string $speaker,
37+
public readonly ?string $type,
3538
) {}
3639

3740
/**
3841
* Acts as static factory, and returns a new Response instance.
3942
*
40-
* @param array{id: int, seek: int, start: float, end: float, text: string, tokens: array<int, int>, temperature: float, avg_logprob: float, compression_ratio: float, no_speech_prob: float, transient?: bool} $attributes
43+
* @param array{id: int|string, start: float, end: float, text: string, seek?: int, tokens?: array<int, int>, temperature?: float, avg_logprob?: float, compression_ratio?: float, no_speech_prob?: float, transient?: bool, speaker?: string, type?: string} $attributes
4144
*/
4245
public static function from(array $attributes): self
4346
{
4447
return new self(
4548
$attributes['id'],
46-
$attributes['seek'],
4749
$attributes['start'],
4850
$attributes['end'],
4951
$attributes['text'],
50-
$attributes['tokens'],
51-
$attributes['temperature'],
52-
$attributes['avg_logprob'],
53-
$attributes['compression_ratio'],
54-
$attributes['no_speech_prob'],
52+
$attributes['seek'] ?? null,
53+
$attributes['tokens'] ?? null,
54+
$attributes['temperature'] ?? null,
55+
$attributes['avg_logprob'] ?? null,
56+
$attributes['compression_ratio'] ?? null,
57+
$attributes['no_speech_prob'] ?? null,
5558
$attributes['transient'] ?? null,
59+
$attributes['speaker'] ?? null,
60+
$attributes['type'] ?? null,
5661
);
5762
}
5863

@@ -63,21 +68,47 @@ public function toArray(): array
6368
{
6469
$data = [
6570
'id' => $this->id,
66-
'seek' => $this->seek,
6771
'start' => $this->start,
6872
'end' => $this->end,
6973
'text' => $this->text,
70-
'tokens' => $this->tokens,
71-
'temperature' => $this->temperature,
72-
'avg_logprob' => $this->avgLogprob,
73-
'compression_ratio' => $this->compressionRatio,
74-
'no_speech_prob' => $this->noSpeechProb,
7574
];
7675

76+
if ($this->seek !== null) {
77+
$data['seek'] = $this->seek;
78+
}
79+
80+
if ($this->tokens !== null) {
81+
$data['tokens'] = $this->tokens;
82+
}
83+
84+
if ($this->temperature !== null) {
85+
$data['temperature'] = $this->temperature;
86+
}
87+
88+
if ($this->avgLogprob !== null) {
89+
$data['avg_logprob'] = $this->avgLogprob;
90+
}
91+
92+
if ($this->compressionRatio !== null) {
93+
$data['compression_ratio'] = $this->compressionRatio;
94+
}
95+
96+
if ($this->noSpeechProb !== null) {
97+
$data['no_speech_prob'] = $this->noSpeechProb;
98+
}
99+
77100
if ($this->transient !== null) {
78101
$data['transient'] = $this->transient;
79102
}
80103

104+
if ($this->speaker !== null) {
105+
$data['speaker'] = $this->speaker;
106+
}
107+
108+
if ($this->type !== null) {
109+
$data['type'] = $this->type;
110+
}
111+
81112
return $data;
82113
}
83114
}

tests/Fixtures/Audio.php

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ function audioTranscriptionVerboseJson(): array
1212
'segments' => [
1313
[
1414
'id' => 0,
15-
'seek' => 0,
1615
'start' => 0.0,
1716
'end' => 4.0,
1817
'text' => ' Hello, how are you?',
18+
'seek' => 0,
1919
'tokens' => [
2020
50364,
2121
2425,
@@ -59,6 +59,29 @@ function audioTranscriptionVerboseJson(): array
5959
];
6060
}
6161

62+
/**
63+
* @return array<string, mixed>
64+
*/
65+
function audioTranscriptionDiarizedJson(): array
66+
{
67+
return [
68+
'task' => 'transcribe',
69+
'language' => 'english',
70+
'duration' => 2.95,
71+
'segments' => [
72+
[
73+
'id' => 'seg_0',
74+
'start' => 0.0,
75+
'end' => 4.0,
76+
'text' => ' Hello, how are you?',
77+
'speaker' => 'A',
78+
'type' => 'transcript.text.segment',
79+
],
80+
],
81+
'text' => 'Hello, how are you?',
82+
];
83+
}
84+
6285
/**
6386
* @return array<string, string>
6487
*/

tests/Responses/Audio/TranscriptionResponse.php

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,23 @@
3737
->meta()->toBeInstanceOf(MetaInformation::class);
3838
});
3939

40+
test('from diarized json', function () {
41+
$transcription = TranscriptionResponse::from(audioTranscriptionDiarizedJson(), meta());
42+
43+
expect($transcription)
44+
->toBeInstanceOf(TranscriptionResponse::class)
45+
->task->toBe('transcribe')
46+
->language->toBe('english')
47+
->duration->toBe(2.95)
48+
->segments->toBeArray()
49+
->segments->toHaveCount(1)
50+
->segments->each->toBeInstanceOf(TranscriptionResponseSegment::class)
51+
->words->toBeArray()
52+
->words->toHaveCount(0)
53+
->text->toBe('Hello, how are you?')
54+
->meta()->toBeInstanceOf(MetaInformation::class);
55+
});
56+
4057
test('from text', function () {
4158
$transcription = TranscriptionResponse::from(audioTranscriptionText(), meta());
4259

tests/Responses/Audio/TranscriptionResponseSegment.php

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,31 @@
1919
->avgLogprob->toBe(-0.45045216878255206)
2020
->compressionRatio->toBe(0.7037037037037037)
2121
->noSpeechProb->toBe(0.1076972484588623)
22-
->transient->toBeFalse();
22+
->transient->toBeFalse()
23+
// Test that diarization-specific properties are null
24+
->type->toBeNull()
25+
->speaker->toBeNull();
26+
});
27+
28+
test('from diarized', function () {
29+
$result = TranscriptionResponseSegment::from(audioTranscriptionDiarizedJson()['segments'][0]);
30+
31+
expect($result)
32+
->toBeInstanceOf(TranscriptionResponseSegment::class)
33+
->id->toBe('seg_0')
34+
->start->toBe(0.0)
35+
->end->toBe(4.0)
36+
->text->toBe(' Hello, how are you?')
37+
->speaker->toBe('A')
38+
->type->toBe('transcript.text.segment')
39+
// Test that non-diarization-specific properties are null
40+
->tokens->toBeNull()
41+
->seek->toBeNull()
42+
->temperature->toBeNull()
43+
->avgLogprob->toBeNull()
44+
->compressionRatio->toBeNull()
45+
->noSpeechProb->toBeNull()
46+
->transient->toBeNull();
2347
});
2448

2549
test('to array', function () {

0 commit comments

Comments
 (0)