Skip to content

Commit d521c63

Browse files
authored
Merge pull request #59 from yml-org/feature/CM-1314/audio-transcriptions
feature: create audio transcriptions entrypoint
2 parents 79faba6 + 4063d43 commit d521c63

File tree

28 files changed

+594
-46
lines changed

28 files changed

+594
-46
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ try {
9191
- [ChatCompletions](guides/Features.md#chatcompletions)
9292
- [ImageGenerations](guides/Features.md#imagegenerations)
9393
- [Edits](guides/Features.md#edits)
94+
- [AudioTranscriptions](guides/Features.md#audioTranscriptions)
9495

9596
## ℹ️ Sample apps
9697

guides/Features.md

Lines changed: 81 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
- [ChatCompletions](#chatcompletions)
77
- [ImageGenerations](#imagegenerations)
88
- [Edits](#edits)
9+
- [AudioTranscriptions](#audioTranscriptions)
910

1011
## ListModels
1112

@@ -86,9 +87,12 @@ var yChat: YChat {
8687

8788
do {
8889
let result = try await yChat.completion()
90+
.setModel(input: "text-davinci-003")
8991
.setInput(input: "Say this is a test.")
9092
.setMaxTokens(tokens: 1024)
91-
.set... // you can set more parameters
93+
.setTemperature(temperature: 1.0)
94+
.setTopP(topP: 1.0)
95+
.saveHistory(isSaveHistory: false)
9296
.execute()
9397
} catch {
9498
// catch any error that may occurs on api call.
@@ -104,9 +108,12 @@ val yChat by lazy {
104108

105109
try {
106110
val result = yChat.completion()
111+
.setModel("text-davinci-003")
107112
.setInput("Say this is a test.")
108113
.setMaxTokens(1024)
109-
.set... // you can set more parameters
114+
.setTemperature(1.0)
115+
.setTopP(1.0)
116+
.saveHistory(false)
110117
.execute()
111118
} catch (e: exception) {
112119
// catch any error that may occurs on api call.
@@ -126,12 +133,15 @@ var yChat: YChat {
126133

127134
do {
128135
let result = try await yChat.chatCompletions()
136+
.setModel(model: "gpt-3.5-turbo")
129137
.setMaxTokens(tokens: 1024)
138+
.setMaxResults(results: 1)
139+
.setTemperature(temperature: 1.0)
140+
.setTopP(topP: 1.0)
130141
.addMessage(
131142
role: "assistant",
132143
content: "You are a helpful assistant that only answers questions related to fitness"
133144
)
134-
.set... // you can set more parameters
135145
.execute(content: "What is the best exercise for building muscle?")
136146
} catch {
137147
// catch any error that may occurs on api call.
@@ -147,12 +157,15 @@ val yChat by lazy {
147157

148158
try {
149159
val result = yChat.chatCompletions()
160+
.setModel("gpt-3.5-turbo")
150161
.setMaxTokens(1024)
162+
.setMaxResults(1)
163+
.setTemperature(1.0)
164+
.setTopP(1.0)
151165
.addMessage(
152166
role = "assistant",
153167
content = "You are a helpful assistant that only answers questions related to fitness"
154168
)
155-
.set... // you can set more parameters
156169
.execute("What is the best exercise for building muscle?")
157170
} catch (e: exception) {
158171
// catch any error that may occurs on api call.
@@ -172,9 +185,9 @@ var yChat: YChat {
172185

173186
do {
174187
let result = try await yChat.imageGenerations()
175-
.setResults(results: 2)
188+
.setResults(results: 1)
176189
.setSize(size: "1024x1024")
177-
.set... // you can set more parameters
190+
.setResponseFormat(responseFormat: "url")
178191
.execute(prompt: "ocean")
179192
} catch {
180193
// catch any error that may occurs on api call.
@@ -190,9 +203,9 @@ val yChat by lazy {
190203

191204
try {
192205
val result = yChat.imageGenerations()
193-
.setResults(2)
206+
.setResults(1)
194207
.setSize("1024x1024")
195-
.set... // you can set more parameters
208+
.setResponseFormat("url")
196209
.execute("ocean")
197210
} catch (e: exception) {
198211
// catch any error that may occurs on api call.
@@ -214,7 +227,9 @@ do {
214227
let result = try await yChat.edits()
215228
.setInput(input: "What day of the wek is it?")
216229
.setResults(result: 1)
217-
.set... // you can set more parameters
230+
.setModel(model: "text-davinci-edit-001")
231+
.setTemperature(temperature: 1.0)
232+
.setTopP(topP: 1.0)
218233
.execute(instruction: "Fix the spelling mistakes")
219234
} catch {
220235
// catch any error that may occurs on api call.
@@ -232,9 +247,65 @@ try {
232247
val result = yChat.edits()
233248
.setInput("What day of the wek is it?")
234249
.setResults(1)
235-
.set... // you can set more parameters
250+
.setModel("text-davinci-edit-001")
251+
.setTemperature(1.0)
252+
.setTopP(1.0)
236253
.execute("Fix the spelling mistakes")
237254
} catch (e: exception) {
238255
// catch any error that may occurs on api call.
239256
}
257+
```
258+
259+
## AudioTranscriptions
260+
261+
The audioTranscriptions api is used to transcribes audio into the input language.
262+
263+
### Swift
264+
265+
```swift
266+
var yChat: YChat {
267+
YChatCompanion.shared.create(apiKey: "your-api-key")
268+
}
269+
270+
guard let audioFileUrl = Bundle.main.url(forResource: "audio", withExtension: "m4a") else {
271+
print("Unable to find the audio file.")
272+
return
273+
}
274+
275+
let audioData = try! Data(contentsOf: audioFileUrl)
276+
277+
do {
278+
let result = try await yChat.audioTranscriptions()
279+
.setModel(model: "whisper-1")
280+
.setPrompt(prompt: "")
281+
.setResponseFormat(format: "json")
282+
.setTemperature(temperature: 0.4)
283+
.setLanguage(language: "en")
284+
.execute(filename: "audio.m4a", audioFile: audioData)
285+
} catch {
286+
// catch any error that may occurs on api call.
287+
}
288+
```
289+
290+
### Kotlin
291+
292+
```kotlin
293+
val yChat by lazy {
294+
YChat.create("your-api-key")
295+
}
296+
297+
val inputStream = application.resources.openRawResource(R.raw.audio)
298+
val byteArray = inputStream.readBytes()
299+
300+
try {
301+
val result = yChat.audioTranscriptions()
302+
.setModel("whisper-1")
303+
.setPrompt("")
304+
.setResponseFormat("json")
305+
.setTemperature(0.4)
306+
.setLanguage("en")
307+
.execute("audio.m4a", byteArray)
308+
} catch (e: exception) {
309+
// catch any error that may occurs on api call.
310+
}
240311
```

sample/jvm/README.md

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,4 +91,19 @@ This endpoint retrieve the artificial intelligence model based on the given ID.
9191

9292
##### Example:
9393

94-
`GET http://localhost:8080/api/ychat/models/babbage`
94+
`GET http://localhost:8080/api/ychat/models/babbage`
95+
96+
### Audio Transcriptions Endpoint
97+
98+
This endpoint transcribes audio into the input language.
99+
100+
##### Endpoint: http://localhost:[port_number]/api/ychat/audio/transcriptions
101+
102+
##### Example:
103+
104+
```
105+
curl -X POST \
106+
-H "Content-Type: multipart/form-data" \
107+
-F "file=@/path/to/audio/file" \
108+
"http://localhost:8080/api/ychat/audio/transcriptions"
109+
```

sample/jvm/src/main/java/co/yml/ychat/jvm/controller/YChatController.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77
import org.springframework.http.ResponseEntity;
88
import org.springframework.web.bind.annotation.GetMapping;
99
import org.springframework.web.bind.annotation.PathVariable;
10+
import org.springframework.web.bind.annotation.PostMapping;
1011
import org.springframework.web.bind.annotation.RequestMapping;
1112
import org.springframework.web.bind.annotation.RequestParam;
1213
import org.springframework.web.bind.annotation.RestController;
14+
import org.springframework.web.multipart.MultipartFile;
1315

1416
@RestController
1517
@RequestMapping("api/ychat")
@@ -64,6 +66,14 @@ public ResponseEntity<AIModel> model(@PathVariable String id) throws Exception {
6466
return ResponseEntity.ok(result);
6567
}
6668

69+
@PostMapping("audio/transcriptions")
70+
public ResponseEntity<String> audioTranscriptions(
71+
@RequestParam("file") MultipartFile multipartFile
72+
) throws Exception {
73+
String result = YChatService.getAudioTranscription(multipartFile);
74+
return ResponseEntity.ok(result);
75+
}
76+
6777
private static class Defaults {
6878
static final String COMPLETION_INPUT = "Say this is a test.";
6979
static final String CHAT_COMPLETION_INPUT = "Tell me one strength exercise";

sample/jvm/src/main/java/co/yml/ychat/jvm/services/YChatService.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
import co.yml.ychat.domain.model.AIModel;
55
import co.yml.ychat.domain.model.ChatMessage;
66
import java.util.List;
7+
import java.util.Optional;
78
import java.util.concurrent.CompletableFuture;
89
import org.jetbrains.annotations.NotNull;
910
import org.springframework.beans.factory.annotation.Autowired;
1011
import org.springframework.stereotype.Service;
12+
import org.springframework.web.multipart.MultipartFile;
1113

1214
@Service
1315
public class YChatService {
@@ -65,6 +67,15 @@ public AIModel getModel(String id) throws Exception {
6567
return future.get();
6668
}
6769

70+
public String getAudioTranscription(MultipartFile multipartFile) throws Exception {
71+
final CompletableFuture<String> future = new CompletableFuture<>();
72+
String filename = Optional.ofNullable(multipartFile.getOriginalFilename())
73+
.orElseThrow(() -> new IllegalStateException("Filename not found"));
74+
byte[] bytes = multipartFile.getBytes();
75+
ychat.audioTranscriptions().execute(filename, bytes, new CompletionCallbackResult<>(future));
76+
return future.get();
77+
}
78+
6879
private static class CompletionCallbackResult<T> implements YChat.Callback<T> {
6980

7081
private final CompletableFuture<T> future;
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
package co.yml.ychat.domain.model
2+
3+
actual typealias FileBytes = ByteArray
4+
5+
actual fun FileBytes.toByteArray(): ByteArray {
6+
return this
7+
}

ychat/src/commonMain/kotlin/co/yml/ychat/YChat.kt

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package co.yml.ychat
22

3+
import co.yml.ychat.entrypoint.features.AudioTranscriptions
34
import co.yml.ychat.entrypoint.features.ChatCompletions
45
import co.yml.ychat.entrypoint.features.Completion
56
import co.yml.ychat.entrypoint.features.Edits
@@ -111,7 +112,7 @@ interface YChat {
111112
* The image generations api is used to generate images based on a prompt. You input some text as a
112113
* prompt, and the model will generate one or more images.
113114
*
114-
* You can configure the parameters of the completion before executing it. Example:
115+
* You can configure the parameters before executing it. Example:
115116
* ```
116117
* val result = YChat.create(apiKey).imageGenerations()
117118
* .setResults(2)
@@ -137,6 +138,20 @@ interface YChat {
137138
*/
138139
fun edits(): Edits
139140

141+
/**
142+
* The audioTranscriptions api is used to transcribes audio into the input language.
143+
*
144+
* You can configure the parameters before executing it. Example:
145+
* ```
146+
* val result = YChat.create(apiKey).audioTranscriptions()
147+
* .setTemperature(0.4)
148+
* .setResponseFormat("json")
149+
* .set...
150+
* .execute("file.mp4", byteArrayFile)
151+
* ```
152+
*/
153+
fun audioTranscriptions(): AudioTranscriptions
154+
140155
/**
141156
* Callback is an interface used for handling the results of an operation.
142157
* It provides two methods, `onSuccess` and `onError`, for handling the success

ychat/src/commonMain/kotlin/co/yml/ychat/data/api/ChatGptApi.kt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package co.yml.ychat.data.api
22

3+
import co.yml.ychat.data.dto.AudioParamsDto
4+
import co.yml.ychat.data.dto.AudioResultDto
35
import co.yml.ychat.data.dto.ChatCompletionParamsDto
46
import co.yml.ychat.data.dto.ChatCompletionsDto
57
import co.yml.ychat.data.dto.CompletionDto
@@ -25,4 +27,6 @@ internal interface ChatGptApi {
2527
suspend fun models(): ApiResult<ModelListDto>
2628

2729
suspend fun model(id: String): ApiResult<ModelDto>
30+
31+
suspend fun audioTranscriptions(audioParamsDto: AudioParamsDto): ApiResult<AudioResultDto>
2832
}

0 commit comments

Comments
 (0)