Skip to content

Commit 63883b0

Browse files
jeff-arnapaparazzi0329
authored andcommitted
chore(hand edits): apply speech to text hand edits
1 parent 2608be0 commit 63883b0

File tree

3 files changed

+71
-15
lines changed

3 files changed

+71
-15
lines changed

speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/SpeechToText.java

Lines changed: 63 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
import com.ibm.watson.speech_to_text.v1.model.RecognitionJob;
7373
import com.ibm.watson.speech_to_text.v1.model.RecognitionJobs;
7474
import com.ibm.watson.speech_to_text.v1.model.RecognizeOptions;
75+
import com.ibm.watson.speech_to_text.v1.model.RecognizeWithWebsocketsOptions;
7576
import com.ibm.watson.speech_to_text.v1.model.RegisterCallbackOptions;
7677
import com.ibm.watson.speech_to_text.v1.model.RegisterStatus;
7778
import com.ibm.watson.speech_to_text.v1.model.ResetAcousticModelOptions;
@@ -87,10 +88,15 @@
8788
import com.ibm.watson.speech_to_text.v1.model.UpgradeLanguageModelOptions;
8889
import com.ibm.watson.speech_to_text.v1.model.Word;
8990
import com.ibm.watson.speech_to_text.v1.model.Words;
91+
import com.ibm.watson.speech_to_text.v1.websocket.RecognizeCallback;
92+
import com.ibm.watson.speech_to_text.v1.websocket.SpeechToTextWebSocketListener;
9093
import java.util.HashMap;
9194
import java.util.Map;
9295
import java.util.Map.Entry;
93-
import okhttp3.MultipartBody;
96+
import okhttp3.HttpUrl;
97+
import okhttp3.OkHttpClient;
98+
import okhttp3.Request;
99+
import okhttp3.WebSocket;
94100

95101
/**
96102
* The IBM Watson™ Speech to Text service provides APIs that use IBM's speech-recognition
@@ -180,6 +186,58 @@ public SpeechToText(String serviceName, Authenticator authenticator) {
180186
this.configureService(serviceName);
181187
}
182188

189+
/**
190+
* Sends audio and returns transcription results for recognition requests over a WebSocket
191+
* connection. Requests and responses are enabled over a single TCP connection that abstracts much
192+
* of the complexity of the request to offer efficient implementation, low latency, high
193+
* throughput, and an asynchronous response. By default, only final results are returned for any
194+
* request; to enable interim results, set the interimResults parameter to true.
195+
*
196+
* <p>The service imposes a data size limit of 100 MB per utterance (per recognition request). You
197+
* can send multiple utterances over a single WebSocket connection. The service automatically
198+
* detects the endianness of the incoming audio and, for audio that includes multiple channels,
199+
* downmixes the audio to one-channel mono during transcoding. (For the audio/l16 format, you can
200+
* specify the endianness.)
201+
*
202+
* @param recognizeOptions the recognize options
203+
* @param callback the {@link RecognizeCallback} instance where results will be sent
204+
* @return the {@link WebSocket}
205+
*/
206+
public WebSocket recognizeUsingWebSocket(
207+
RecognizeWithWebsocketsOptions recognizeOptions, RecognizeCallback callback) {
208+
com.ibm.cloud.sdk.core.util.Validator.notNull(
209+
recognizeOptions, "recognizeOptions cannot be null");
210+
com.ibm.cloud.sdk.core.util.Validator.notNull(recognizeOptions.audio(), "audio cannot be null");
211+
com.ibm.cloud.sdk.core.util.Validator.notNull(callback, "callback cannot be null");
212+
213+
HttpUrl.Builder urlBuilder = HttpUrl.parse(getServiceUrl() + "/v1/recognize").newBuilder();
214+
215+
if (recognizeOptions.model() != null) {
216+
urlBuilder.addQueryParameter("model", recognizeOptions.model());
217+
}
218+
if (recognizeOptions.languageCustomizationId() != null) {
219+
urlBuilder.addQueryParameter(
220+
"language_customization_id", recognizeOptions.languageCustomizationId());
221+
}
222+
if (recognizeOptions.acousticCustomizationId() != null) {
223+
urlBuilder.addQueryParameter(
224+
"acoustic_customization_id", recognizeOptions.acousticCustomizationId());
225+
}
226+
if (recognizeOptions.baseModelVersion() != null) {
227+
urlBuilder.addQueryParameter("base_model_version", recognizeOptions.baseModelVersion());
228+
}
229+
230+
String url = urlBuilder.toString().replace("https://", "wss://");
231+
Request.Builder builder = new Request.Builder().url(url);
232+
233+
setAuthentication(builder);
234+
setDefaultHeaders(builder);
235+
236+
OkHttpClient client = configureHttpClient();
237+
return client.newWebSocket(
238+
builder.build(), new SpeechToTextWebSocketListener(recognizeOptions, callback));
239+
}
240+
183241
/**
184242
* List models.
185243
*
@@ -1378,12 +1436,10 @@ public ServiceCall<Void> addCorpus(AddCorpusOptions addCorpusOptions) {
13781436
if (addCorpusOptions.allowOverwrite() != null) {
13791437
builder.query("allow_overwrite", String.valueOf(addCorpusOptions.allowOverwrite()));
13801438
}
1381-
MultipartBody.Builder multipartBuilder = new MultipartBody.Builder();
1382-
multipartBuilder.setType(MultipartBody.FORM);
1383-
okhttp3.RequestBody corpusFileBody =
1384-
RequestUtils.inputStreamBody(addCorpusOptions.corpusFile(), "text/plain");
1385-
multipartBuilder.addFormDataPart("corpus_file", "filename", corpusFileBody);
1386-
builder.body(multipartBuilder.build());
1439+
1440+
// hand edit replacement for corpus file serialization
1441+
builder.body(RequestUtils.inputStreamBody(addCorpusOptions.corpusFile(), "text/plain"));
1442+
13871443
ResponseConverter<Void> responseConverter = ResponseConverterUtils.getVoid();
13881444
return createServiceCall(builder.build(), responseConverter);
13891445
}

speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/SpeechRecognitionAlternative.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ public class SpeechRecognitionAlternative extends GenericModel {
2121

2222
protected String transcript;
2323
protected Double confidence;
24-
protected List<String> timestamps;
24+
protected List<SpeechTimestamp> timestamps;
2525

2626
@SerializedName("word_confidence")
27-
protected List<String> wordConfidence;
27+
protected List<SpeechWordConfidence> wordConfidence;
2828

2929
protected SpeechRecognitionAlternative() {}
3030

@@ -62,7 +62,7 @@ public Double getConfidence() {
6262
*
6363
* @return the timestamps
6464
*/
65-
public List<String> getTimestamps() {
65+
public List<SpeechTimestamp> getTimestamps() {
6666
return timestamps;
6767
}
6868

@@ -76,7 +76,7 @@ public List<String> getTimestamps() {
7676
*
7777
* @return the wordConfidence
7878
*/
79-
public List<String> getWordConfidence() {
79+
public List<SpeechWordConfidence> getWordConfidence() {
8080
return wordConfidence;
8181
}
8282
}

speech-to-text/src/test/java/com/ibm/watson/speech_to_text/v1/SpeechToTextTest.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ public void testGetModelNoOptions() throws Throwable {
212212
public void testRecognizeWOptions() throws Throwable {
213213
// Register a mock response
214214
String mockResponseBody =
215-
"{\"results\": [{\"final\": true, \"alternatives\": [{\"transcript\": \"transcript\", \"confidence\": 0, \"timestamps\": [\"timestamps\"], \"word_confidence\": [\"wordConfidence\"]}], \"keywords_result\": {\"mapKey\": [{\"normalized_text\": \"normalizedText\", \"start_time\": 9, \"end_time\": 7, \"confidence\": 0}]}, \"word_alternatives\": [{\"start_time\": 9, \"end_time\": 7, \"alternatives\": [{\"confidence\": 0, \"word\": \"word\"}]}], \"end_of_utterance\": \"end_of_data\"}], \"result_index\": 11, \"speaker_labels\": [{\"from\": 4, \"to\": 2, \"speaker\": 7, \"confidence\": 10, \"final\": true}], \"processing_metrics\": {\"processed_audio\": {\"received\": 8, \"seen_by_engine\": 12, \"transcription\": 13, \"speaker_labels\": 13}, \"wall_clock_since_first_byte_received\": 31, \"periodic\": true}, \"audio_metrics\": {\"sampling_interval\": 16, \"accumulated\": {\"final\": true, \"end_time\": 7, \"signal_to_noise_ratio\": 18, \"speech_ratio\": 11, \"high_frequency_loss\": 17, \"direct_current_offset\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"clipping_rate\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"speech_level\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"non_speech_level\": [{\"begin\": 5, \"end\": 3, \"count\": 5}]}}, \"warnings\": [\"warnings\"]}";
215+
"{\"results\": [{\"final\": true, \"alternatives\": [{\"transcript\": \"transcript\", \"confidence\": 0, \"timestamps\": [[\"timestamps\"]], \"word_confidence\": [[\"wordConfidence\"]]}], \"keywords_result\": {\"mapKey\": [{\"normalized_text\": \"normalizedText\", \"start_time\": 9, \"end_time\": 7, \"confidence\": 0}]}, \"word_alternatives\": [{\"start_time\": 9, \"end_time\": 7, \"alternatives\": [{\"confidence\": 0, \"word\": \"word\"}]}], \"end_of_utterance\": \"end_of_data\"}], \"result_index\": 11, \"speaker_labels\": [{\"from\": 4, \"to\": 2, \"speaker\": 7, \"confidence\": 10, \"final\": true}], \"processing_metrics\": {\"processed_audio\": {\"received\": 8, \"seen_by_engine\": 12, \"transcription\": 13, \"speaker_labels\": 13}, \"wall_clock_since_first_byte_received\": 31, \"periodic\": true}, \"audio_metrics\": {\"sampling_interval\": 16, \"accumulated\": {\"final\": true, \"end_time\": 7, \"signal_to_noise_ratio\": 18, \"speech_ratio\": 11, \"high_frequency_loss\": 17, \"direct_current_offset\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"clipping_rate\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"speech_level\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"non_speech_level\": [{\"begin\": 5, \"end\": 3, \"count\": 5}]}}, \"warnings\": [\"warnings\"]}";
216216
String recognizePath = "/v1/recognize";
217217
server.enqueue(
218218
new MockResponse()
@@ -424,7 +424,7 @@ public void testUnregisterCallbackNoOptions() throws Throwable {
424424
public void testCreateJobWOptions() throws Throwable {
425425
// Register a mock response
426426
String mockResponseBody =
427-
"{\"id\": \"id\", \"status\": \"waiting\", \"created\": \"created\", \"updated\": \"updated\", \"url\": \"url\", \"user_token\": \"userToken\", \"results\": [{\"results\": [{\"final\": true, \"alternatives\": [{\"transcript\": \"transcript\", \"confidence\": 0, \"timestamps\": [\"timestamps\"], \"word_confidence\": [\"wordConfidence\"]}], \"keywords_result\": {\"mapKey\": [{\"normalized_text\": \"normalizedText\", \"start_time\": 9, \"end_time\": 7, \"confidence\": 0}]}, \"word_alternatives\": [{\"start_time\": 9, \"end_time\": 7, \"alternatives\": [{\"confidence\": 0, \"word\": \"word\"}]}], \"end_of_utterance\": \"end_of_data\"}], \"result_index\": 11, \"speaker_labels\": [{\"from\": 4, \"to\": 2, \"speaker\": 7, \"confidence\": 10, \"final\": true}], \"processing_metrics\": {\"processed_audio\": {\"received\": 8, \"seen_by_engine\": 12, \"transcription\": 13, \"speaker_labels\": 13}, \"wall_clock_since_first_byte_received\": 31, \"periodic\": true}, \"audio_metrics\": {\"sampling_interval\": 16, \"accumulated\": {\"final\": true, \"end_time\": 7, \"signal_to_noise_ratio\": 18, \"speech_ratio\": 11, \"high_frequency_loss\": 17, \"direct_current_offset\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"clipping_rate\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"speech_level\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"non_speech_level\": [{\"begin\": 5, \"end\": 3, \"count\": 5}]}}, \"warnings\": [\"warnings\"]}], \"warnings\": [\"warnings\"]}";
427+
"{\"id\": \"id\", \"status\": \"waiting\", \"created\": \"created\", \"updated\": \"updated\", \"url\": \"url\", \"user_token\": \"userToken\", \"results\": [{\"results\": [{\"final\": true, \"alternatives\": [{\"transcript\": \"transcript\", \"confidence\": 0, \"timestamps\": [[\"timestamps\"]], \"word_confidence\": [[\"wordConfidence\"]]}], \"keywords_result\": {\"mapKey\": [{\"normalized_text\": \"normalizedText\", \"start_time\": 9, \"end_time\": 7, \"confidence\": 0}]}, \"word_alternatives\": [{\"start_time\": 9, \"end_time\": 7, \"alternatives\": [{\"confidence\": 0, \"word\": \"word\"}]}], \"end_of_utterance\": \"end_of_data\"}], \"result_index\": 11, \"speaker_labels\": [{\"from\": 4, \"to\": 2, \"speaker\": 7, \"confidence\": 10, \"final\": true}], \"processing_metrics\": {\"processed_audio\": {\"received\": 8, \"seen_by_engine\": 12, \"transcription\": 13, \"speaker_labels\": 13}, \"wall_clock_since_first_byte_received\": 31, \"periodic\": true}, \"audio_metrics\": {\"sampling_interval\": 16, \"accumulated\": {\"final\": true, \"end_time\": 7, \"signal_to_noise_ratio\": 18, \"speech_ratio\": 11, \"high_frequency_loss\": 17, \"direct_current_offset\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"clipping_rate\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"speech_level\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"non_speech_level\": [{\"begin\": 5, \"end\": 3, \"count\": 5}]}}, \"warnings\": [\"warnings\"]}], \"warnings\": [\"warnings\"]}";
428428
String createJobPath = "/v1/recognitions";
429429
server.enqueue(
430430
new MockResponse()
@@ -542,7 +542,7 @@ public void testCreateJobNoOptions() throws Throwable {
542542
public void testCheckJobsWOptions() throws Throwable {
543543
// Register a mock response
544544
String mockResponseBody =
545-
"{\"recognitions\": [{\"id\": \"id\", \"status\": \"waiting\", \"created\": \"created\", \"updated\": \"updated\", \"url\": \"url\", \"user_token\": \"userToken\", \"results\": [{\"results\": [{\"final\": true, \"alternatives\": [{\"transcript\": \"transcript\", \"confidence\": 0, \"timestamps\": [\"timestamps\"], \"word_confidence\": [\"wordConfidence\"]}], \"keywords_result\": {\"mapKey\": [{\"normalized_text\": \"normalizedText\", \"start_time\": 9, \"end_time\": 7, \"confidence\": 0}]}, \"word_alternatives\": [{\"start_time\": 9, \"end_time\": 7, \"alternatives\": [{\"confidence\": 0, \"word\": \"word\"}]}], \"end_of_utterance\": \"end_of_data\"}], \"result_index\": 11, \"speaker_labels\": [{\"from\": 4, \"to\": 2, \"speaker\": 7, \"confidence\": 10, \"final\": true}], \"processing_metrics\": {\"processed_audio\": {\"received\": 8, \"seen_by_engine\": 12, \"transcription\": 13, \"speaker_labels\": 13}, \"wall_clock_since_first_byte_received\": 31, \"periodic\": true}, \"audio_metrics\": {\"sampling_interval\": 16, \"accumulated\": {\"final\": true, \"end_time\": 7, \"signal_to_noise_ratio\": 18, \"speech_ratio\": 11, \"high_frequency_loss\": 17, \"direct_current_offset\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"clipping_rate\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"speech_level\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"non_speech_level\": [{\"begin\": 5, \"end\": 3, \"count\": 5}]}}, \"warnings\": [\"warnings\"]}], \"warnings\": [\"warnings\"]}]}";
545+
"{\"recognitions\": [{\"id\": \"id\", \"status\": \"waiting\", \"created\": \"created\", \"updated\": \"updated\", \"url\": \"url\", \"user_token\": \"userToken\", \"results\": [{\"results\": [{\"final\": true, \"alternatives\": [{\"transcript\": \"transcript\", \"confidence\": 0, \"timestamps\": [[\"timestamps\"]], \"word_confidence\": [[\"wordConfidence\"]]}], \"keywords_result\": {\"mapKey\": [{\"normalized_text\": \"normalizedText\", \"start_time\": 9, \"end_time\": 7, \"confidence\": 0}]}, \"word_alternatives\": [{\"start_time\": 9, \"end_time\": 7, \"alternatives\": [{\"confidence\": 0, \"word\": \"word\"}]}], \"end_of_utterance\": \"end_of_data\"}], \"result_index\": 11, \"speaker_labels\": [{\"from\": 4, \"to\": 2, \"speaker\": 7, \"confidence\": 10, \"final\": true}], \"processing_metrics\": {\"processed_audio\": {\"received\": 8, \"seen_by_engine\": 12, \"transcription\": 13, \"speaker_labels\": 13}, \"wall_clock_since_first_byte_received\": 31, \"periodic\": true}, \"audio_metrics\": {\"sampling_interval\": 16, \"accumulated\": {\"final\": true, \"end_time\": 7, \"signal_to_noise_ratio\": 18, \"speech_ratio\": 11, \"high_frequency_loss\": 17, \"direct_current_offset\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"clipping_rate\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"speech_level\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"non_speech_level\": [{\"begin\": 5, \"end\": 3, \"count\": 5}]}}, \"warnings\": [\"warnings\"]}], \"warnings\": [\"warnings\"]}]}";
546546
String checkJobsPath = "/v1/recognitions";
547547
server.enqueue(
548548
new MockResponse()
@@ -587,7 +587,7 @@ public void testCheckJobsWRetries() throws Throwable {
587587
public void testCheckJobWOptions() throws Throwable {
588588
// Register a mock response
589589
String mockResponseBody =
590-
"{\"id\": \"id\", \"status\": \"waiting\", \"created\": \"created\", \"updated\": \"updated\", \"url\": \"url\", \"user_token\": \"userToken\", \"results\": [{\"results\": [{\"final\": true, \"alternatives\": [{\"transcript\": \"transcript\", \"confidence\": 0, \"timestamps\": [\"timestamps\"], \"word_confidence\": [\"wordConfidence\"]}], \"keywords_result\": {\"mapKey\": [{\"normalized_text\": \"normalizedText\", \"start_time\": 9, \"end_time\": 7, \"confidence\": 0}]}, \"word_alternatives\": [{\"start_time\": 9, \"end_time\": 7, \"alternatives\": [{\"confidence\": 0, \"word\": \"word\"}]}], \"end_of_utterance\": \"end_of_data\"}], \"result_index\": 11, \"speaker_labels\": [{\"from\": 4, \"to\": 2, \"speaker\": 7, \"confidence\": 10, \"final\": true}], \"processing_metrics\": {\"processed_audio\": {\"received\": 8, \"seen_by_engine\": 12, \"transcription\": 13, \"speaker_labels\": 13}, \"wall_clock_since_first_byte_received\": 31, \"periodic\": true}, \"audio_metrics\": {\"sampling_interval\": 16, \"accumulated\": {\"final\": true, \"end_time\": 7, \"signal_to_noise_ratio\": 18, \"speech_ratio\": 11, \"high_frequency_loss\": 17, \"direct_current_offset\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"clipping_rate\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"speech_level\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"non_speech_level\": [{\"begin\": 5, \"end\": 3, \"count\": 5}]}}, \"warnings\": [\"warnings\"]}], \"warnings\": [\"warnings\"]}";
590+
"{\"id\": \"id\", \"status\": \"waiting\", \"created\": \"created\", \"updated\": \"updated\", \"url\": \"url\", \"user_token\": \"userToken\", \"results\": [{\"results\": [{\"final\": true, \"alternatives\": [{\"transcript\": \"transcript\", \"confidence\": 0, \"timestamps\": [[\"timestamps\"]], \"word_confidence\": [[\"wordConfidence\"]]}], \"keywords_result\": {\"mapKey\": [{\"normalized_text\": \"normalizedText\", \"start_time\": 9, \"end_time\": 7, \"confidence\": 0}]}, \"word_alternatives\": [{\"start_time\": 9, \"end_time\": 7, \"alternatives\": [{\"confidence\": 0, \"word\": \"word\"}]}], \"end_of_utterance\": \"end_of_data\"}], \"result_index\": 11, \"speaker_labels\": [{\"from\": 4, \"to\": 2, \"speaker\": 7, \"confidence\": 10, \"final\": true}], \"processing_metrics\": {\"processed_audio\": {\"received\": 8, \"seen_by_engine\": 12, \"transcription\": 13, \"speaker_labels\": 13}, \"wall_clock_since_first_byte_received\": 31, \"periodic\": true}, \"audio_metrics\": {\"sampling_interval\": 16, \"accumulated\": {\"final\": true, \"end_time\": 7, \"signal_to_noise_ratio\": 18, \"speech_ratio\": 11, \"high_frequency_loss\": 17, \"direct_current_offset\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"clipping_rate\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"speech_level\": [{\"begin\": 5, \"end\": 3, \"count\": 5}], \"non_speech_level\": [{\"begin\": 5, \"end\": 3, \"count\": 5}]}}, \"warnings\": [\"warnings\"]}], \"warnings\": [\"warnings\"]}";
591591
String checkJobPath = "/v1/recognitions/testString";
592592
server.enqueue(
593593
new MockResponse()

0 commit comments

Comments
 (0)