Skip to content

Commit 5cb5238

Browse files
feat(stt): add speechBeginEvent param to recognize func
1 parent 45ec51d commit 5cb5238

File tree

9 files changed

+260
-112
lines changed

9 files changed

+260
-112
lines changed

speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/SpeechToText.java

+114-60
Large diffs are not rendered by default.

speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/Corpus.java

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* (C) Copyright IBM Corp. 2016, 2023.
2+
* (C) Copyright IBM Corp. 2024.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
55
* the License. You may obtain a copy of the License at
@@ -73,8 +73,9 @@ public Long getTotalWords() {
7373
/**
7474
* Gets the outOfVocabularyWords.
7575
*
76-
* <p>_For custom models that are based on previous-generation models_, the number of OOV words
77-
* extracted from the corpus. The value is `0` while the corpus is being processed.
76+
* <p>_For custom models that are based on large speech models and previous-generation models_,
77+
* the number of OOV words extracted from the corpus. The value is `0` while the corpus is being
78+
* processed.
7879
*
7980
* <p>_For custom models that are based on next-generation models_, no OOV words are extracted
8081
* from corpora, so the value is always `0`.

speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/CreateJobOptions.java

+33-18
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* (C) Copyright IBM Corp. 2018, 2024.
2+
* (C) Copyright IBM Corp. 2024.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
55
* the License. You may obtain a copy of the License at
@@ -51,6 +51,8 @@ public interface Model {
5151
String DE_DE_NARROWBANDMODEL = "de-DE_NarrowbandModel";
5252
/** de-DE_Telephony. */
5353
String DE_DE_TELEPHONY = "de-DE_Telephony";
54+
/** en-AU. */
55+
String EN_AU = "en-AU";
5456
/** en-AU_BroadbandModel. */
5557
String EN_AU_BROADBANDMODEL = "en-AU_BroadbandModel";
5658
/** en-AU_Multimedia. */
@@ -59,8 +61,12 @@ public interface Model {
5961
String EN_AU_NARROWBANDMODEL = "en-AU_NarrowbandModel";
6062
/** en-AU_Telephony. */
6163
String EN_AU_TELEPHONY = "en-AU_Telephony";
64+
/** en-IN. */
65+
String EN_IN = "en-IN";
6266
/** en-IN_Telephony. */
6367
String EN_IN_TELEPHONY = "en-IN_Telephony";
68+
/** en-GB. */
69+
String EN_GB = "en-GB";
6470
/** en-GB_BroadbandModel. */
6571
String EN_GB_BROADBANDMODEL = "en-GB_BroadbandModel";
6672
/** en-GB_Multimedia. */
@@ -69,6 +75,8 @@ public interface Model {
6975
String EN_GB_NARROWBANDMODEL = "en-GB_NarrowbandModel";
7076
/** en-GB_Telephony. */
7177
String EN_GB_TELEPHONY = "en-GB_Telephony";
78+
/** en-US. */
79+
String EN_US = "en-US";
7280
/** en-US_BroadbandModel. */
7381
String EN_US_BROADBANDMODEL = "en-US_BroadbandModel";
7482
/** en-US_Multimedia. */
@@ -111,6 +119,8 @@ public interface Model {
111119
String ES_PE_BROADBANDMODEL = "es-PE_BroadbandModel";
112120
/** es-PE_NarrowbandModel. */
113121
String ES_PE_NARROWBANDMODEL = "es-PE_NarrowbandModel";
122+
/** fr-CA. */
123+
String FR_CA = "fr-CA";
114124
/** fr-CA_BroadbandModel. */
115125
String FR_CA_BROADBANDMODEL = "fr-CA_BroadbandModel";
116126
/** fr-CA_Multimedia. */
@@ -119,6 +129,8 @@ public interface Model {
119129
String FR_CA_NARROWBANDMODEL = "fr-CA_NarrowbandModel";
120130
/** fr-CA_Telephony. */
121131
String FR_CA_TELEPHONY = "fr-CA_Telephony";
132+
/** fr-FR. */
133+
String FR_FR = "fr-FR";
122134
/** fr-FR_BroadbandModel. */
123135
String FR_FR_BROADBANDMODEL = "fr-FR_BroadbandModel";
124136
/** fr-FR_Multimedia. */
@@ -137,6 +149,8 @@ public interface Model {
137149
String IT_IT_MULTIMEDIA = "it-IT_Multimedia";
138150
/** it-IT_Telephony. */
139151
String IT_IT_TELEPHONY = "it-IT_Telephony";
152+
/** ja-JP. */
153+
String JA_JP = "ja-JP";
140154
/** ja-JP_BroadbandModel. */
141155
String JA_JP_BROADBANDMODEL = "ja-JP_BroadbandModel";
142156
/** ja-JP_Multimedia. */
@@ -952,9 +966,9 @@ public String baseModelVersion() {
952966
* custom language model compared to those from the base model for the current request.
953967
*
954968
* <p>Specify a value between 0.0 and 1.0. Unless a different customization weight was specified
955-
* for the custom model when the model was trained, the default value is: * 0.3 for
956-
* previous-generation models * 0.2 for most next-generation models * 0.1 for next-generation
957-
* English and Japanese models
969+
* for the custom model when the model was trained, the default value is: * 0.5 for large speech
970+
* models * 0.3 for previous-generation models * 0.2 for most next-generation models * 0.1 for
971+
* next-generation English and Japanese models
958972
*
959973
* <p>A customization weight that you specify overrides a weight that was specified when the
960974
* custom model was trained. The default value yields the best performance in general. Assign a
@@ -1117,8 +1131,8 @@ public Boolean smartFormatting() {
11171131
/**
11181132
* Gets the smartFormattingVersion.
11191133
*
1120-
* <p>Smart formatting version is for next-generation models and that is supported in US English,
1121-
* Brazilian Portuguese, French and German languages.
1134+
* <p>Smart formatting version for large speech models and next-generation models is supported in
1135+
* US English, Brazilian Portuguese, French, German, Spanish and French Canadian languages.
11221136
*
11231137
* @return the smartFormattingVersion
11241138
*/
@@ -1135,8 +1149,8 @@ public Long smartFormattingVersion() {
11351149
* of whether you specify `false` for the parameter. * _For previous-generation models,_ the
11361150
* parameter can be used with Australian English, US English, German, Japanese, Korean, and
11371151
* Spanish (both broadband and narrowband models) and UK English (narrowband model) transcription
1138-
* only. * _For next-generation models,_ the parameter can be used with Czech, English
1139-
* (Australian, Indian, UK, and US), German, Japanese, Korean, and Spanish transcription only.
1152+
* only. * _For large speech models and next-generation models,_ the parameter can be used with
1153+
* all available languages.
11401154
*
11411155
* <p>See [Speaker
11421156
* labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-speaker-labels).
@@ -1310,8 +1324,8 @@ public Boolean splitTranscriptAtPhraseEnd() {
13101324
* <p>The values increase on a monotonic curve. Specifying one or two decimal places of precision
13111325
* (for example, `0.55`) is typically more than sufficient.
13121326
*
1313-
* <p>The parameter is supported with all next-generation models and with most previous-generation
1314-
* models. See [Speech detector
1327+
* <p>The parameter is supported with all large speech models, next-generation models and with
1328+
* most previous-generation models. See [Speech detector
13151329
* sensitivity](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-sensitivity)
13161330
* and [Language model
13171331
* support](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-support).
@@ -1336,8 +1350,8 @@ public Float speechDetectorSensitivity() {
13361350
* <p>The values increase on a monotonic curve. Specifying one or two decimal places of precision
13371351
* (for example, `0.55`) is typically more than sufficient.
13381352
*
1339-
* <p>The parameter is supported with all next-generation models and with most previous-generation
1340-
* models. See [Background audio
1353+
* <p>The parameter is supported with all large speech models, next-generation models and with
1354+
* most previous-generation models. See [Background audio
13411355
* suppression](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-suppression)
13421356
* and [Language model
13431357
* support](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-support).
@@ -1357,9 +1371,9 @@ public Float backgroundAudioSuppression() {
13571371
* parameter causes the models to produce results even more quickly, though the results might be
13581372
* less accurate when the parameter is used.
13591373
*
1360-
* <p>The parameter is not available for previous-generation `Broadband` and `Narrowband` models.
1361-
* It is available for most next-generation models. * For a list of next-generation models that
1362-
* support low latency, see [Supported next-generation language
1374+
* <p>The parameter is not available for large speech models and previous-generation `Broadband`
1375+
* and `Narrowband` models. It is available for most next-generation models. * For a list of
1376+
* next-generation models that support low latency, see [Supported next-generation language
13631377
* models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng#models-ng-supported).
13641378
* * For more information about the `low_latency` parameter, see [Low
13651379
* latency](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-interim#low-latency).
@@ -1373,9 +1387,10 @@ public Boolean lowLatency() {
13731387
/**
13741388
* Gets the characterInsertionBias.
13751389
*
1376-
* <p>For next-generation models, an indication of whether the service is biased to recognize
1377-
* shorter or longer strings of characters when developing transcription hypotheses. By default,
1378-
* the service is optimized to produce the best balance of strings of different lengths.
1390+
* <p>For large speech models and next-generation models, an indication of whether the service is
1391+
* biased to recognize shorter or longer strings of characters when developing transcription
1392+
* hypotheses. By default, the service is optimized to produce the best balance of strings of
1393+
* different lengths.
13791394
*
13801395
* <p>The default bias is 0.0. The allowable range of values is -1.0 to 1.0. * Negative values
13811396
* bias the service to favor hypotheses with shorter strings of characters. * Positive values bias

speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/CreateLanguageModelOptions.java

+15-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* (C) Copyright IBM Corp. 2018, 2023.
2+
* (C) Copyright IBM Corp. 2024.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
55
* the License. You may obtain a copy of the License at
@@ -39,6 +39,8 @@ public interface BaseModelName {
3939
String DE_DE_NARROWBANDMODEL = "de-DE_NarrowbandModel";
4040
/** de-DE_Telephony. */
4141
String DE_DE_TELEPHONY = "de-DE_Telephony";
42+
/** en-AU. */
43+
String EN_AU = "en-AU";
4244
/** en-AU_BroadbandModel. */
4345
String EN_AU_BROADBANDMODEL = "en-AU_BroadbandModel";
4446
/** en-AU_Multimedia. */
@@ -47,6 +49,8 @@ public interface BaseModelName {
4749
String EN_AU_NARROWBANDMODEL = "en-AU_NarrowbandModel";
4850
/** en-AU_Telephony. */
4951
String EN_AU_TELEPHONY = "en-AU_Telephony";
52+
/** en-GB. */
53+
String EN_GB = "en-GB";
5054
/** en-GB_BroadbandModel. */
5155
String EN_GB_BROADBANDMODEL = "en-GB_BroadbandModel";
5256
/** en-GB_Multimedia. */
@@ -55,8 +59,12 @@ public interface BaseModelName {
5559
String EN_GB_NARROWBANDMODEL = "en-GB_NarrowbandModel";
5660
/** en-GB_Telephony. */
5761
String EN_GB_TELEPHONY = "en-GB_Telephony";
62+
/** en-IN. */
63+
String EN_IN = "en-IN";
5864
/** en-IN_Telephony. */
5965
String EN_IN_TELEPHONY = "en-IN_Telephony";
66+
/** en-US. */
67+
String EN_US = "en-US";
6068
/** en-US_BroadbandModel. */
6169
String EN_US_BROADBANDMODEL = "en-US_BroadbandModel";
6270
/** en-US_Multimedia. */
@@ -99,6 +107,8 @@ public interface BaseModelName {
99107
String ES_PE_BROADBANDMODEL = "es-PE_BroadbandModel";
100108
/** es-PE_NarrowbandModel. */
101109
String ES_PE_NARROWBANDMODEL = "es-PE_NarrowbandModel";
110+
/** fr-CA. */
111+
String FR_CA = "fr-CA";
102112
/** fr-CA_BroadbandModel. */
103113
String FR_CA_BROADBANDMODEL = "fr-CA_BroadbandModel";
104114
/** fr-CA_Multimedia. */
@@ -107,6 +117,8 @@ public interface BaseModelName {
107117
String FR_CA_NARROWBANDMODEL = "fr-CA_NarrowbandModel";
108118
/** fr-CA_Telephony. */
109119
String FR_CA_TELEPHONY = "fr-CA_Telephony";
120+
/** fr-FR. */
121+
String FR_FR = "fr-FR";
110122
/** fr-FR_BroadbandModel. */
111123
String FR_FR_BROADBANDMODEL = "fr-FR_BroadbandModel";
112124
/** fr-FR_Multimedia. */
@@ -125,6 +137,8 @@ public interface BaseModelName {
125137
String IT_IT_MULTIMEDIA = "it-IT_Multimedia";
126138
/** it-IT_Telephony. */
127139
String IT_IT_TELEPHONY = "it-IT_Telephony";
140+
/** ja-JP. */
141+
String JA_JP = "ja-JP";
128142
/** ja-JP_BroadbandModel. */
129143
String JA_JP_BROADBANDMODEL = "ja-JP_BroadbandModel";
130144
/** ja-JP_Multimedia. */

speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/GetModelOptions.java

+15-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* (C) Copyright IBM Corp. 2018, 2023.
2+
* (C) Copyright IBM Corp. 2024.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
55
* the License. You may obtain a copy of the License at
@@ -36,6 +36,8 @@ public interface ModelId {
3636
String DE_DE_NARROWBANDMODEL = "de-DE_NarrowbandModel";
3737
/** de-DE_Telephony. */
3838
String DE_DE_TELEPHONY = "de-DE_Telephony";
39+
/** en-AU. */
40+
String EN_AU = "en-AU";
3941
/** en-AU_BroadbandModel. */
4042
String EN_AU_BROADBANDMODEL = "en-AU_BroadbandModel";
4143
/** en-AU_Multimedia. */
@@ -44,6 +46,8 @@ public interface ModelId {
4446
String EN_AU_NARROWBANDMODEL = "en-AU_NarrowbandModel";
4547
/** en-AU_Telephony. */
4648
String EN_AU_TELEPHONY = "en-AU_Telephony";
49+
/** en-GB. */
50+
String EN_GB = "en-GB";
4751
/** en-GB_BroadbandModel. */
4852
String EN_GB_BROADBANDMODEL = "en-GB_BroadbandModel";
4953
/** en-GB_Multimedia. */
@@ -52,8 +56,12 @@ public interface ModelId {
5256
String EN_GB_NARROWBANDMODEL = "en-GB_NarrowbandModel";
5357
/** en-GB_Telephony. */
5458
String EN_GB_TELEPHONY = "en-GB_Telephony";
59+
/** en-IN. */
60+
String EN_IN = "en-IN";
5561
/** en-IN_Telephony. */
5662
String EN_IN_TELEPHONY = "en-IN_Telephony";
63+
/** en-US. */
64+
String EN_US = "en-US";
5765
/** en-US_BroadbandModel. */
5866
String EN_US_BROADBANDMODEL = "en-US_BroadbandModel";
5967
/** en-US_Multimedia. */
@@ -96,6 +104,8 @@ public interface ModelId {
96104
String ES_PE_BROADBANDMODEL = "es-PE_BroadbandModel";
97105
/** es-PE_NarrowbandModel. */
98106
String ES_PE_NARROWBANDMODEL = "es-PE_NarrowbandModel";
107+
/** fr-CA. */
108+
String FR_CA = "fr-CA";
99109
/** fr-CA_BroadbandModel. */
100110
String FR_CA_BROADBANDMODEL = "fr-CA_BroadbandModel";
101111
/** fr-CA_Multimedia. */
@@ -104,6 +114,8 @@ public interface ModelId {
104114
String FR_CA_NARROWBANDMODEL = "fr-CA_NarrowbandModel";
105115
/** fr-CA_Telephony. */
106116
String FR_CA_TELEPHONY = "fr-CA_Telephony";
117+
/** fr-FR. */
118+
String FR_FR = "fr-FR";
107119
/** fr-FR_BroadbandModel. */
108120
String FR_FR_BROADBANDMODEL = "fr-FR_BroadbandModel";
109121
/** fr-FR_Multimedia. */
@@ -122,6 +134,8 @@ public interface ModelId {
122134
String IT_IT_MULTIMEDIA = "it-IT_Multimedia";
123135
/** it-IT_Telephony. */
124136
String IT_IT_TELEPHONY = "it-IT_Telephony";
137+
/** ja-JP. */
138+
String JA_JP = "ja-JP";
125139
/** ja-JP_BroadbandModel. */
126140
String JA_JP_BROADBANDMODEL = "ja-JP_BroadbandModel";
127141
/** ja-JP_Multimedia. */

0 commit comments

Comments
 (0)