Skip to content

Commit 62e4f8e

Browse files
committed
feat(speech-to-text-v1): supportedFeatures: customAcousticModel property added & update comments
1 parent b52d05d commit 62e4f8e

15 files changed

+385
-225
lines changed

speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/SpeechToText.java

+117-75
Large diffs are not rendered by default.

speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/AcousticModel.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* (C) Copyright IBM Corp. 2018, 2021.
2+
* (C) Copyright IBM Corp. 2022.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
55
* the License. You may obtain a copy of the License at
@@ -116,8 +116,8 @@ public String getLanguage() {
116116
*
117117
* <p>A list of the available versions of the custom acoustic model. Each element of the array
118118
* indicates a version of the base model with which the custom model can be used. Multiple
119-
* versions exist only if the custom model has been upgraded; otherwise, only a single version is
120-
* shown.
119+
* versions exist only if the custom model has been upgraded to a new version of its base model.
120+
* Otherwise, only a single version is shown.
121121
*
122122
* @return the versions
123123
*/

speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/CreateAcousticModelOptions.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* (C) Copyright IBM Corp. 2018, 2021.
2+
* (C) Copyright IBM Corp. 2022.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
55
* the License. You may obtain a copy of the License at
@@ -24,7 +24,7 @@ public class CreateAcousticModelOptions extends GenericModel {
2424
*
2525
* <p>To determine whether a base model supports acoustic model customization, refer to [Language
2626
* support for
27-
* customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-custom-support#custom-language-support).
27+
* customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-custom-support).
2828
*/
2929
public interface BaseModelName {
3030
/** ar-AR_BroadbandModel. */
@@ -221,7 +221,7 @@ public String name() {
221221
*
222222
* <p>To determine whether a base model supports acoustic model customization, refer to [Language
223223
* support for
224-
* customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-custom-support#custom-language-support).
224+
* customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-custom-support).
225225
*
226226
* @return the baseModelName
227227
*/

speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/CreateJobOptions.java

+54-32
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* (C) Copyright IBM Corp. 2018, 2021.
2+
* (C) Copyright IBM Corp. 2022.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
55
* the License. You may obtain a copy of the License at
@@ -25,11 +25,9 @@ public class CreateJobOptions extends GenericModel {
2525

2626
/**
2727
* The identifier of the model that is to be used for the recognition request. (**Note:** The
28-
* model `ar-AR_BroadbandModel` is deprecated; use `ar-MS_BroadbandModel` instead.) See
29-
* [Previous-generation languages and
30-
* models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models) and
31-
* [Next-generation languages and
32-
* models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng).
28+
* model `ar-AR_BroadbandModel` is deprecated; use `ar-MS_BroadbandModel` instead.) See [Using a
29+
* model for speech
30+
* recognition](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-use).
3331
*/
3432
public interface Model {
3533
/** ar-AR_BroadbandModel. */
@@ -38,6 +36,8 @@ public interface Model {
3836
String AR_MS_BROADBANDMODEL = "ar-MS_BroadbandModel";
3937
/** ar-MS_Telephony. */
4038
String AR_MS_TELEPHONY = "ar-MS_Telephony";
39+
/** cs-CZ_Telephony. */
40+
String CS_CZ_TELEPHONY = "cs-CZ_Telephony";
4141
/** de-DE_BroadbandModel. */
4242
String DE_DE_BROADBANDMODEL = "de-DE_BroadbandModel";
4343
/** de-DE_NarrowbandModel. */
@@ -46,6 +46,8 @@ public interface Model {
4646
String DE_DE_TELEPHONY = "de-DE_Telephony";
4747
/** en-AU_BroadbandModel. */
4848
String EN_AU_BROADBANDMODEL = "en-AU_BroadbandModel";
49+
/** en-AU_Multimedia. */
50+
String EN_AU_MULTIMEDIA = "en-AU_Multimedia";
4951
/** en-AU_NarrowbandModel. */
5052
String EN_AU_NARROWBANDMODEL = "en-AU_NarrowbandModel";
5153
/** en-AU_Telephony. */
@@ -54,6 +56,8 @@ public interface Model {
5456
String EN_IN_TELEPHONY = "en-IN_Telephony";
5557
/** en-GB_BroadbandModel. */
5658
String EN_GB_BROADBANDMODEL = "en-GB_BroadbandModel";
59+
/** en-GB_Multimedia. */
60+
String EN_GB_MULTIMEDIA = "en-GB_Multimedia";
5761
/** en-GB_NarrowbandModel. */
5862
String EN_GB_NARROWBANDMODEL = "en-GB_NarrowbandModel";
5963
/** en-GB_Telephony. */
@@ -68,6 +72,8 @@ public interface Model {
6872
String EN_US_SHORTFORM_NARROWBANDMODEL = "en-US_ShortForm_NarrowbandModel";
6973
/** en-US_Telephony. */
7074
String EN_US_TELEPHONY = "en-US_Telephony";
75+
/** en-WW_Medical_Telephony. */
76+
String EN_WW_MEDICAL_TELEPHONY = "en-WW_Medical_Telephony";
7177
/** es-AR_BroadbandModel. */
7278
String ES_AR_BROADBANDMODEL = "es-AR_BroadbandModel";
7379
/** es-AR_NarrowbandModel. */
@@ -84,8 +90,12 @@ public interface Model {
8490
String ES_ES_BROADBANDMODEL = "es-ES_BroadbandModel";
8591
/** es-ES_NarrowbandModel. */
8692
String ES_ES_NARROWBANDMODEL = "es-ES_NarrowbandModel";
93+
/** es-ES_Multimedia. */
94+
String ES_ES_MULTIMEDIA = "es-ES_Multimedia";
8795
/** es-ES_Telephony. */
8896
String ES_ES_TELEPHONY = "es-ES_Telephony";
97+
/** es-LA_Telephony. */
98+
String ES_LA_TELEPHONY = "es-LA_Telephony";
8999
/** es-MX_BroadbandModel. */
90100
String ES_MX_BROADBANDMODEL = "es-MX_BroadbandModel";
91101
/** es-MX_NarrowbandModel. */
@@ -136,6 +146,8 @@ public interface Model {
136146
String NL_NL_BROADBANDMODEL = "nl-NL_BroadbandModel";
137147
/** nl-NL_NarrowbandModel. */
138148
String NL_NL_NARROWBANDMODEL = "nl-NL_NarrowbandModel";
149+
/** nl-NL_Telephony. */
150+
String NL_NL_TELEPHONY = "nl-NL_Telephony";
139151
/** pt-BR_BroadbandModel. */
140152
String PT_BR_BROADBANDMODEL = "pt-BR_BroadbandModel";
141153
/** pt-BR_NarrowbandModel. */
@@ -146,6 +158,8 @@ public interface Model {
146158
String ZH_CN_BROADBANDMODEL = "zh-CN_BroadbandModel";
147159
/** zh-CN_NarrowbandModel. */
148160
String ZH_CN_NARROWBANDMODEL = "zh-CN_NarrowbandModel";
161+
/** zh-CN_Telephony. */
162+
String ZH_CN_TELEPHONY = "zh-CN_Telephony";
149163
}
150164

151165
/**
@@ -752,11 +766,9 @@ public String contentType() {
752766
* Gets the model.
753767
*
754768
* <p>The identifier of the model that is to be used for the recognition request. (**Note:** The
755-
* model `ar-AR_BroadbandModel` is deprecated; use `ar-MS_BroadbandModel` instead.) See
756-
* [Previous-generation languages and
757-
* models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models) and
758-
* [Next-generation languages and
759-
* models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng).
769+
* model `ar-AR_BroadbandModel` is deprecated; use `ar-MS_BroadbandModel` instead.) See [Using a
770+
* model for speech
771+
* recognition](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-use).
760772
*
761773
* @return the model
762774
*/
@@ -1020,7 +1032,9 @@ public Boolean timestamps() {
10201032
*
10211033
* <p>If `true`, the service filters profanity from all output except for keyword results by
10221034
* replacing inappropriate words with a series of asterisks. Set the parameter to `false` to
1023-
* return results with no censoring. Applies to US English and Japanese transcription only. See
1035+
* return results with no censoring.
1036+
*
1037+
* <p>**Note:** The parameter can be used with US English and Japanese transcription only. See
10241038
* [Profanity
10251039
* filtering](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-formatting#profanity-filtering).
10261040
*
@@ -1038,7 +1052,7 @@ public Boolean profanityFilter() {
10381052
* final transcript of a recognition request. For US English, the service also converts certain
10391053
* keyword strings to punctuation symbols. By default, the service performs no smart formatting.
10401054
*
1041-
* <p>**Beta:** The parameter is beta functionality. Applies to US English, Japanese, and Spanish
1055+
* <p>**Note:** The parameter can be used with US English, Japanese, and Spanish (all dialects)
10421056
* transcription only.
10431057
*
10441058
* <p>See [Smart
@@ -1056,16 +1070,13 @@ public Boolean smartFormatting() {
10561070
* <p>If `true`, the response includes labels that identify which words were spoken by which
10571071
* participants in a multi-person exchange. By default, the service returns no speaker labels.
10581072
* Setting `speaker_labels` to `true` forces the `timestamps` parameter to be `true`, regardless
1059-
* of whether you specify `false` for the parameter.
1060-
*
1061-
* <p>**Beta:** The parameter is beta functionality. * For previous-generation models, the
1062-
* parameter can be used for Australian English, US English, German, Japanese, Korean, and Spanish
1063-
* (both broadband and narrowband models) and UK English (narrowband model) transcription only. *
1064-
* For next-generation models, the parameter can be used for English (Australian, Indian, UK, and
1065-
* US), German, Japanese, Korean, and Spanish transcription only.
1073+
* of whether you specify `false` for the parameter. * _For previous-generation models,_ the
1074+
* parameter can be used with Australian English, US English, German, Japanese, Korean, and
1075+
* Spanish (both broadband and narrowband models) and UK English (narrowband model) transcription
1076+
* only. * _For next-generation models,_ the parameter can be used with Czech, English
1077+
* (Australian, Indian, UK, and US), German, Japanese, Korean, and Spanish transcription only.
10661078
*
1067-
* <p>Restrictions and limitations apply to the use of speaker labels for both types of models.
1068-
* See [Speaker
1079+
* <p>See [Speaker
10691080
* labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-speaker-labels).
10701081
*
10711082
* @return the speakerLabels
@@ -1096,8 +1107,6 @@ public String customizationId() {
10961107
* that are recognized by the specified grammar; it does not recognize other custom words from the
10971108
* model's words resource.
10981109
*
1099-
* <p>**Beta:** The parameter is beta functionality.
1100-
*
11011110
* <p>See [Using a grammar for speech
11021111
* recognition](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-grammarUse).
11031112
*
@@ -1120,8 +1129,8 @@ public String grammarName() {
11201129
* disables keyword spotting (ignores the `keywords` and `keywords_threshold` parameters) and
11211130
* returns only a single final transcript (forces the `max_alternatives` parameter to be `1`).
11221131
*
1123-
* <p>**Beta:** The parameter is beta functionality. Applies to US English, Japanese, and Korean
1124-
* transcription only.
1132+
* <p>**Note:** The parameter can be used with US English, Japanese, and Korean transcription
1133+
* only.
11251134
*
11261135
* <p>See [Numeric
11271136
* redaction](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-formatting#numeric-redaction).
@@ -1222,8 +1231,11 @@ public Double endOfPhraseSilenceTime() {
12221231
* semantic features of the input, for example, at the conclusion of meaningful phrases such as
12231232
* sentences. The service bases its understanding of semantic features on the base language model
12241233
* that you use with a request. Custom language models and grammars can also influence how and
1225-
* where the service splits a transcript. By default, the service splits transcripts based solely
1226-
* on the pause interval.
1234+
* where the service splits a transcript.
1235+
*
1236+
* <p>By default, the service splits transcripts based solely on the pause interval. If the
1237+
* parameters are used together on the same request, `end_of_phrase_silence_time` has precedence
1238+
* over `split_transcript_at_phrase_end`.
12271239
*
12281240
* <p>See [Split transcript at phrase
12291241
* end](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-parsing#split-transcript).
@@ -1246,8 +1258,13 @@ public Boolean splitTranscriptAtPhraseEnd() {
12461258
* * 0.5 (the default) provides a reasonable compromise for the level of sensitivity. * 1.0
12471259
* suppresses no audio (speech detection sensitivity is disabled).
12481260
*
1249-
* <p>The values increase on a monotonic curve. See [Speech detector
1250-
* sensitivity](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-sensitivity).
1261+
* <p>The values increase on a monotonic curve.
1262+
*
1263+
* <p>The parameter is supported with all next-generation models and with most previous-generation
1264+
* models. See [Speech detector
1265+
* sensitivity](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-sensitivity)
1266+
* and [Language model
1267+
* support](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-support).
12511268
*
12521269
* @return the speechDetectorSensitivity
12531270
*/
@@ -1266,8 +1283,13 @@ public Float speechDetectorSensitivity() {
12661283
* (background audio suppression is disabled). * 0.5 provides a reasonable level of audio
12671284
* suppression for general usage. * 1.0 suppresses all audio (no audio is transcribed).
12681285
*
1269-
* <p>The values increase on a monotonic curve. See [Background audio
1270-
* suppression](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-suppression).
1286+
* <p>The values increase on a monotonic curve.
1287+
*
1288+
* <p>The parameter is supported with all next-generation models and with most previous-generation
1289+
* models. See [Background audio
1290+
* suppression](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-suppression)
1291+
* and [Language model
1292+
* support](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-support).
12711293
*
12721294
* @return the backgroundAudioSuppression
12731295
*/

speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/CreateLanguageModelOptions.java

+30-15
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* (C) Copyright IBM Corp. 2018, 2021.
2+
* (C) Copyright IBM Corp. 2022.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
55
* the License. You may obtain a copy of the License at
@@ -24,11 +24,13 @@ public class CreateLanguageModelOptions extends GenericModel {
2424
* <p>To determine whether a base model supports language model customization, use the [Get a
2525
* model](#getmodel) method and check that the attribute `custom_language_model` is set to `true`.
2626
* You can also refer to [Language support for
27-
* customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-custom-support#custom-language-support).
27+
* customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-custom-support).
2828
*/
2929
public interface BaseModelName {
3030
/** ar-MS_Telephony. */
3131
String AR_MS_TELEPHONY = "ar-MS_Telephony";
32+
/** cs-CZ_Telephony. */
33+
String CS_CZ_TELEPHONY = "cs-CZ_Telephony";
3234
/** de-DE_BroadbandModel. */
3335
String DE_DE_BROADBANDMODEL = "de-DE_BroadbandModel";
3436
/** de-DE_NarrowbandModel. */
@@ -37,12 +39,16 @@ public interface BaseModelName {
3739
String DE_DE_TELEPHONY = "de-DE_Telephony";
3840
/** en-AU_BroadbandModel. */
3941
String EN_AU_BROADBANDMODEL = "en-AU_BroadbandModel";
42+
/** en-AU_Multimedia. */
43+
String EN_AU_MULTIMEDIA = "en-AU_Multimedia";
4044
/** en-AU_NarrowbandModel. */
4145
String EN_AU_NARROWBANDMODEL = "en-AU_NarrowbandModel";
4246
/** en-AU_Telephony. */
4347
String EN_AU_TELEPHONY = "en-AU_Telephony";
4448
/** en-GB_BroadbandModel. */
4549
String EN_GB_BROADBANDMODEL = "en-GB_BroadbandModel";
50+
/** en-GB_Multimedia. */
51+
String EN_GB_MULTIMEDIA = "en-GB_Multimedia";
4652
/** en-GB_NarrowbandModel. */
4753
String EN_GB_NARROWBANDMODEL = "en-GB_NarrowbandModel";
4854
/** en-GB_Telephony. */
@@ -59,6 +65,8 @@ public interface BaseModelName {
5965
String EN_US_SHORTFORM_NARROWBANDMODEL = "en-US_ShortForm_NarrowbandModel";
6066
/** en-US_Telephony. */
6167
String EN_US_TELEPHONY = "en-US_Telephony";
68+
/** en-WW_Medical_Telephony. */
69+
String EN_WW_MEDICAL_TELEPHONY = "en-WW_Medical_Telephony";
6270
/** es-AR_BroadbandModel. */
6371
String ES_AR_BROADBANDMODEL = "es-AR_BroadbandModel";
6472
/** es-AR_NarrowbandModel. */
@@ -75,8 +83,12 @@ public interface BaseModelName {
7583
String ES_ES_BROADBANDMODEL = "es-ES_BroadbandModel";
7684
/** es-ES_NarrowbandModel. */
7785
String ES_ES_NARROWBANDMODEL = "es-ES_NarrowbandModel";
86+
/** es-ES_Multimedia. */
87+
String ES_ES_MULTIMEDIA = "es-ES_Multimedia";
7888
/** es-ES_Telephony. */
7989
String ES_ES_TELEPHONY = "es-ES_Telephony";
90+
/** es-LA_Telephony. */
91+
String ES_LA_TELEPHONY = "es-LA_Telephony";
8092
/** es-MX_BroadbandModel. */
8193
String ES_MX_BROADBANDMODEL = "es-MX_BroadbandModel";
8294
/** es-MX_NarrowbandModel. */
@@ -127,12 +139,16 @@ public interface BaseModelName {
127139
String NL_NL_BROADBANDMODEL = "nl-NL_BroadbandModel";
128140
/** nl-NL_NarrowbandModel. */
129141
String NL_NL_NARROWBANDMODEL = "nl-NL_NarrowbandModel";
142+
/** nl-NL_Telephony. */
143+
String NL_NL_TELEPHONY = "nl-NL_Telephony";
130144
/** pt-BR_BroadbandModel. */
131145
String PT_BR_BROADBANDMODEL = "pt-BR_BroadbandModel";
132146
/** pt-BR_NarrowbandModel. */
133147
String PT_BR_NARROWBANDMODEL = "pt-BR_NarrowbandModel";
134148
/** pt-BR_Telephony. */
135149
String PT_BR_TELEPHONY = "pt-BR_Telephony";
150+
/** zh-CN_Telephony. */
151+
String ZH_CN_TELEPHONY = "zh-CN_Telephony";
136152
}
137153

138154
protected String name;
@@ -264,7 +280,7 @@ public String name() {
264280
* <p>To determine whether a base model supports language model customization, use the [Get a
265281
* model](#getmodel) method and check that the attribute `custom_language_model` is set to `true`.
266282
* You can also refer to [Language support for
267-
* customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-custom-support#custom-language-support).
283+
* customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-custom-support).
268284
*
269285
* @return the baseModelName
270286
*/
@@ -275,20 +291,19 @@ public String baseModelName() {
275291
/**
276292
* Gets the dialect.
277293
*
278-
* <p>The dialect of the specified language that is to be used with the custom language model. For
279-
* most languages, the dialect matches the language of the base model by default. For example,
280-
* `en-US` is used for the US English language models. All dialect values are case-insensitive.
294+
* <p>The dialect of the specified language that is to be used with the custom language model.
295+
* _For all languages, it is always safe to omit this field._ The service automatically uses the
296+
* language identifier from the name of the base model. For example, the service automatically
297+
* uses `en-US` for all US English models.
281298
*
282-
* <p>The parameter is meaningful only for Spanish language models, for which you can always
283-
* safely omit the parameter to have the service create the correct mapping. For Spanish, the
284-
* service creates a custom language model that is suited for speech in one of the following
285-
* dialects: * `es-ES` for Castilian Spanish (`es-ES` models) * `es-LA` for Latin American Spanish
286-
* (`es-AR`, `es-CL`, `es-CO`, and `es-PE` models) * `es-US` for Mexican (North American) Spanish
287-
* (`es-MX` models)
299+
* <p>If you specify the `dialect` for a new custom model, follow these guidelines. _For
300+
* non-Spanish previous-generation models and for next-generation models,_ you must specify a
301+
* value that matches the five-character language identifier from the name of the base model. _For
302+
* Spanish previous-generation models,_ you must specify one of the following values: * `es-ES`
303+
* for Castilian Spanish (`es-ES` models) * `es-LA` for Latin American Spanish (`es-AR`, `es-CL`,
304+
* `es-CO`, and `es-PE` models) * `es-US` for Mexican (North American) Spanish (`es-MX` models)
288305
*
289-
* <p>If you specify the `dialect` parameter for a non-Spanish language model, its value must
290-
* match the language of the base model. If you specify the `dialect` for a Spanish language
291-
* model, its value must match one of the defined mappings (`es-ES`, `es-LA`, or `es-MX`).
306+
* <p>All values that you pass for the `dialect` field are case-insensitive.
292307
*
293308
* @return the dialect
294309
*/

0 commit comments

Comments
 (0)