Skip to content

Commit bfd4b0d

Browse files
feat(wss): add and remove websocket params
1 parent 995e687 commit bfd4b0d

File tree

4 files changed

+145
-33
lines changed

4 files changed

+145
-33
lines changed

speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/SpeechToText.java

-3
Original file line numberDiff line numberDiff line change
@@ -506,9 +506,6 @@ public WebSocket recognizeUsingWebSocket(
506506
if (recognizeOptions.model() != null) {
507507
urlBuilder.addQueryParameter("model", recognizeOptions.model());
508508
}
509-
if (recognizeOptions.customizationId() != null) {
510-
urlBuilder.addQueryParameter("customization_id", recognizeOptions.customizationId());
511-
}
512509
if (recognizeOptions.languageCustomizationId() != null) {
513510
urlBuilder.addQueryParameter(
514511
"language_customization_id", recognizeOptions.languageCustomizationId());

speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/RecognizeWithWebsocketsOptions.java

+143-28
Original file line numberDiff line numberDiff line change
@@ -31,24 +31,50 @@ public class RecognizeWithWebsocketsOptions extends GenericModel {
3131
public interface Model {
3232
/** ar-AR_BroadbandModel. */
3333
String AR_AR_BROADBANDMODEL = "ar-AR_BroadbandModel";
34+
/** ar-MS_BroadbandModel. */
35+
String AR_MS_BROADBANDMODEL = "ar-MS_BroadbandModel";
36+
/** ar-MS_Telephony. */
37+
String AR_MS_TELEPHONY = "ar-MS_Telephony";
38+
/** cs-CZ_Telephony. */
39+
String CS_CZ_TELEPHONY = "cs-CZ_Telephony";
3440
/** de-DE_BroadbandModel. */
3541
String DE_DE_BROADBANDMODEL = "de-DE_BroadbandModel";
42+
/** de-DE_Multimedia. */
43+
String DE_DE_MULTIMEDIA = "de-DE_Multimedia";
3644
/** de-DE_NarrowbandModel. */
3745
String DE_DE_NARROWBANDMODEL = "de-DE_NarrowbandModel";
46+
/** de-DE_Telephony. */
47+
String DE_DE_TELEPHONY = "de-DE_Telephony";
3848
/** en-AU_BroadbandModel. */
3949
String EN_AU_BROADBANDMODEL = "en-AU_BroadbandModel";
50+
/** en-AU_Multimedia. */
51+
String EN_AU_MULTIMEDIA = "en-AU_Multimedia";
4052
/** en-AU_NarrowbandModel. */
4153
String EN_AU_NARROWBANDMODEL = "en-AU_NarrowbandModel";
54+
/** en-AU_Telephony. */
55+
String EN_AU_TELEPHONY = "en-AU_Telephony";
56+
/** en-IN_Telephony. */
57+
String EN_IN_TELEPHONY = "en-IN_Telephony";
4258
/** en-GB_BroadbandModel. */
4359
String EN_GB_BROADBANDMODEL = "en-GB_BroadbandModel";
60+
/** en-GB_Multimedia. */
61+
String EN_GB_MULTIMEDIA = "en-GB_Multimedia";
4462
/** en-GB_NarrowbandModel. */
4563
String EN_GB_NARROWBANDMODEL = "en-GB_NarrowbandModel";
64+
/** en-GB_Telephony. */
65+
String EN_GB_TELEPHONY = "en-GB_Telephony";
4666
/** en-US_BroadbandModel. */
4767
String EN_US_BROADBANDMODEL = "en-US_BroadbandModel";
68+
/** en-US_Multimedia. */
69+
String EN_US_MULTIMEDIA = "en-US_Multimedia";
4870
/** en-US_NarrowbandModel. */
4971
String EN_US_NARROWBANDMODEL = "en-US_NarrowbandModel";
5072
/** en-US_ShortForm_NarrowbandModel. */
5173
String EN_US_SHORTFORM_NARROWBANDMODEL = "en-US_ShortForm_NarrowbandModel";
74+
/** en-US_Telephony. */
75+
String EN_US_TELEPHONY = "en-US_Telephony";
76+
/** en-WW_Medical_Telephony. */
77+
String EN_WW_MEDICAL_TELEPHONY = "en-WW_Medical_Telephony";
5278
/** es-AR_BroadbandModel. */
5379
String ES_AR_BROADBANDMODEL = "es-AR_BroadbandModel";
5480
/** es-AR_NarrowbandModel. */
@@ -65,6 +91,12 @@ public interface Model {
6591
String ES_ES_BROADBANDMODEL = "es-ES_BroadbandModel";
6692
/** es-ES_NarrowbandModel. */
6793
String ES_ES_NARROWBANDMODEL = "es-ES_NarrowbandModel";
94+
/** es-ES_Multimedia. */
95+
String ES_ES_MULTIMEDIA = "es-ES_Multimedia";
96+
/** es-ES_Telephony. */
97+
String ES_ES_TELEPHONY = "es-ES_Telephony";
98+
/** es-LA_Telephony. */
99+
String ES_LA_TELEPHONY = "es-LA_Telephony";
68100
/** es-MX_BroadbandModel. */
69101
String ES_MX_BROADBANDMODEL = "es-MX_BroadbandModel";
70102
/** es-MX_NarrowbandModel. */
@@ -77,34 +109,62 @@ public interface Model {
77109
String FR_CA_BROADBANDMODEL = "fr-CA_BroadbandModel";
78110
/** fr-CA_NarrowbandModel. */
79111
String FR_CA_NARROWBANDMODEL = "fr-CA_NarrowbandModel";
112+
/** fr-CA_Telephony. */
113+
String FR_CA_TELEPHONY = "fr-CA_Telephony";
80114
/** fr-FR_BroadbandModel. */
81115
String FR_FR_BROADBANDMODEL = "fr-FR_BroadbandModel";
116+
/** fr-FR_Multimedia. */
117+
String FR_FR_MULTIMEDIA = "fr-FR_Multimedia";
82118
/** fr-FR_NarrowbandModel. */
83119
String FR_FR_NARROWBANDMODEL = "fr-FR_NarrowbandModel";
120+
/** fr-FR_Telephony. */
121+
String FR_FR_TELEPHONY = "fr-FR_Telephony";
122+
/** hi-IN_Telephony. */
123+
String HI_IN_TELEPHONY = "hi-IN_Telephony";
84124
/** it-IT_BroadbandModel. */
85125
String IT_IT_BROADBANDMODEL = "it-IT_BroadbandModel";
86126
/** it-IT_NarrowbandModel. */
87127
String IT_IT_NARROWBANDMODEL = "it-IT_NarrowbandModel";
128+
/** it-IT_Multimedia. */
129+
String IT_IT_MULTIMEDIA = "it-IT_Multimedia";
130+
/** it-IT_Telephony. */
131+
String IT_IT_TELEPHONY = "it-IT_Telephony";
88132
/** ja-JP_BroadbandModel. */
89133
String JA_JP_BROADBANDMODEL = "ja-JP_BroadbandModel";
134+
/** ja-JP_Multimedia. */
135+
String JA_JP_MULTIMEDIA = "ja-JP_Multimedia";
90136
/** ja-JP_NarrowbandModel. */
91137
String JA_JP_NARROWBANDMODEL = "ja-JP_NarrowbandModel";
92138
/** ko-KR_BroadbandModel. */
93139
String KO_KR_BROADBANDMODEL = "ko-KR_BroadbandModel";
140+
/** ko-KR_Multimedia. */
141+
String KO_KR_MULTIMEDIA = "ko-KR_Multimedia";
94142
/** ko-KR_NarrowbandModel. */
95143
String KO_KR_NARROWBANDMODEL = "ko-KR_NarrowbandModel";
144+
/** ko-KR_Telephony. */
145+
String KO_KR_TELEPHONY = "ko-KR_Telephony";
146+
/** nl-BE_Telephony. */
147+
String NL_BE_TELEPHONY = "nl-BE_Telephony";
96148
/** nl-NL_BroadbandModel. */
97149
String NL_NL_BROADBANDMODEL = "nl-NL_BroadbandModel";
98150
/** nl-NL_NarrowbandModel. */
99151
String NL_NL_NARROWBANDMODEL = "nl-NL_NarrowbandModel";
152+
/** nl-NL_Telephony. */
153+
String NL_NL_TELEPHONY = "nl-NL_Telephony";
100154
/** pt-BR_BroadbandModel. */
101155
String PT_BR_BROADBANDMODEL = "pt-BR_BroadbandModel";
156+
/** pt-BR_Multimedia. */
157+
String PT_BR_MULTIMEDIA = "pt-BR_Multimedia";
102158
/** pt-BR_NarrowbandModel. */
103159
String PT_BR_NARROWBANDMODEL = "pt-BR_NarrowbandModel";
160+
/** pt-BR_Telephony. */
161+
String PT_BR_TELEPHONY = "pt-BR_Telephony";
104162
/** zh-CN_BroadbandModel. */
105163
String ZH_CN_BROADBANDMODEL = "zh-CN_BroadbandModel";
106164
/** zh-CN_NarrowbandModel. */
107165
String ZH_CN_NARROWBANDMODEL = "zh-CN_NarrowbandModel";
166+
/** zh-CN_Telephony. */
167+
String ZH_CN_TELEPHONY = "zh-CN_Telephony";
108168
}
109169

110170
protected transient InputStream audio;
@@ -127,14 +187,15 @@ public interface Model {
127187
protected Boolean profanityFilter;
128188
protected Boolean smartFormatting;
129189
protected Boolean speakerLabels;
130-
protected String customizationId;
131190
protected String grammarName;
132191
protected Boolean redaction;
133192
protected Boolean audioMetrics;
134193
protected Double endOfPhraseSilenceTime;
135194
protected Boolean splitTranscriptAtPhraseEnd;
136195
protected Float speechDetectorSensitivity;
137196
protected Float backgroundAudioSuppression;
197+
protected Boolean lowLatency;
198+
protected Float characterInsertionBias;
138199
private Boolean interimResults;
139200
private Boolean processingMetrics;
140201
private Float processingMetricsInterval;
@@ -158,14 +219,15 @@ public static class Builder {
158219
private Boolean profanityFilter;
159220
private Boolean smartFormatting;
160221
private Boolean speakerLabels;
161-
private String customizationId;
162222
private String grammarName;
163223
private Boolean redaction;
164224
private Boolean audioMetrics;
165225
private Double endOfPhraseSilenceTime;
166226
private Boolean splitTranscriptAtPhraseEnd;
167227
private Float speechDetectorSensitivity;
168228
private Float backgroundAudioSuppression;
229+
private Boolean lowLatency;
230+
private Float characterInsertionBias;
169231
private Boolean interimResults;
170232
private Boolean processingMetrics;
171233
private Float processingMetricsInterval;
@@ -188,14 +250,15 @@ private Builder(RecognizeWithWebsocketsOptions recognizeWithWebsocketsOptions) {
188250
this.profanityFilter = recognizeWithWebsocketsOptions.profanityFilter;
189251
this.smartFormatting = recognizeWithWebsocketsOptions.smartFormatting;
190252
this.speakerLabels = recognizeWithWebsocketsOptions.speakerLabels;
191-
this.customizationId = recognizeWithWebsocketsOptions.customizationId;
192253
this.grammarName = recognizeWithWebsocketsOptions.grammarName;
193254
this.redaction = recognizeWithWebsocketsOptions.redaction;
194255
this.audioMetrics = recognizeWithWebsocketsOptions.audioMetrics;
195256
this.endOfPhraseSilenceTime = recognizeWithWebsocketsOptions.endOfPhraseSilenceTime;
196257
this.splitTranscriptAtPhraseEnd = recognizeWithWebsocketsOptions.splitTranscriptAtPhraseEnd;
197258
this.speechDetectorSensitivity = recognizeWithWebsocketsOptions.speechDetectorSensitivity;
198259
this.backgroundAudioSuppression = recognizeWithWebsocketsOptions.backgroundAudioSuppression;
260+
this.lowLatency = recognizeWithWebsocketsOptions.lowLatency;
261+
this.characterInsertionBias = recognizeWithWebsocketsOptions.characterInsertionBias;
199262
this.interimResults = recognizeWithWebsocketsOptions.interimResults;
200263
this.processingMetrics = recognizeWithWebsocketsOptions.processingMetrics;
201264
this.processingMetricsInterval = recognizeWithWebsocketsOptions.processingMetricsInterval;
@@ -424,17 +487,6 @@ public Builder speakerLabels(Boolean speakerLabels) {
424487
return this;
425488
}
426489

427-
/**
428-
* Set the customizationId.
429-
*
430-
* @param customizationId the customizationId
431-
* @return the RecognizeOptions builder
432-
*/
433-
public Builder customizationId(String customizationId) {
434-
this.customizationId = customizationId;
435-
return this;
436-
}
437-
438490
/**
439491
* Set the grammarName.
440492
*
@@ -512,6 +564,28 @@ public Builder backgroundAudioSuppression(Float backgroundAudioSuppression) {
512564
return this;
513565
}
514566

567+
/**
568+
* Set the lowLatency.
569+
*
570+
* @param lowLatency the lowLatency
571+
* @return the RecognizeOptions builder
572+
*/
573+
public Builder lowLatency(Boolean lowLatency) {
574+
this.lowLatency = lowLatency;
575+
return this;
576+
}
577+
578+
/**
579+
* Set the characterInsertionBias.
580+
*
581+
* @param characterInsertionBias the characterInsertionBias
582+
* @return the RecognizeOptions builder
583+
*/
584+
public Builder characterInsertionBias(Float characterInsertionBias) {
585+
this.characterInsertionBias = characterInsertionBias;
586+
return this;
587+
}
588+
515589
/**
516590
* Set the audio.
517591
*
@@ -583,14 +657,15 @@ protected RecognizeWithWebsocketsOptions(Builder builder) {
583657
profanityFilter = builder.profanityFilter;
584658
smartFormatting = builder.smartFormatting;
585659
speakerLabels = builder.speakerLabels;
586-
customizationId = builder.customizationId;
587660
grammarName = builder.grammarName;
588661
redaction = builder.redaction;
589662
audioMetrics = builder.audioMetrics;
590663
endOfPhraseSilenceTime = builder.endOfPhraseSilenceTime;
591664
splitTranscriptAtPhraseEnd = builder.splitTranscriptAtPhraseEnd;
592665
speechDetectorSensitivity = builder.speechDetectorSensitivity;
593666
backgroundAudioSuppression = builder.backgroundAudioSuppression;
667+
lowLatency = builder.lowLatency;
668+
characterInsertionBias = builder.characterInsertionBias;
594669
interimResults = builder.interimResults;
595670
processingMetrics = builder.processingMetrics;
596671
processingMetricsInterval = builder.processingMetricsInterval;
@@ -874,19 +949,6 @@ public Boolean speakerLabels() {
874949
return speakerLabels;
875950
}
876951

877-
/**
878-
* Gets the customizationId.
879-
*
880-
* <p>**Deprecated.** Use the `language_customization_id` parameter to specify the customization
881-
* ID (GUID) of a custom language model that is to be used with the recognition request. Do not
882-
* specify both parameters with a request.
883-
*
884-
* @return the customizationId
885-
*/
886-
public String customizationId() {
887-
return customizationId;
888-
}
889-
890952
/**
891953
* Gets the grammarName.
892954
*
@@ -1028,6 +1090,59 @@ public Float backgroundAudioSuppression() {
10281090
return backgroundAudioSuppression;
10291091
}
10301092

1093+
/**
1094+
* Gets the lowLatency.
1095+
*
1096+
* <p>If `true` for next-generation `Multimedia` and `Telephony` models that support low latency,
1097+
* directs the service to produce results even more quickly than it usually does. Next-generation
1098+
* models produce transcription results faster than previous-generation models. The `low_latency`
1099+
* parameter causes the models to produce results even more quickly, though the results might be
1100+
* less accurate when the parameter is used.
1101+
*
1102+
* <p>The parameter is not available for previous-generation `Broadband` and `Narrowband` models.
1103+
* It is available for most next-generation models. * For a list of next-generation models that
1104+
* support low latency, see [Supported next-generation language
1105+
* models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng#models-ng-supported).
1106+
* * For more information about the `low_latency` parameter, see [Low
1107+
* latency](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-interim#low-latency).
1108+
*
1109+
* @return the lowLatency
1110+
*/
1111+
public Boolean lowLatency() {
1112+
return lowLatency;
1113+
}
1114+
1115+
/**
1116+
* Gets the characterInsertionBias.
1117+
*
1118+
* <p>For next-generation `Multimedia` and `Telephony` models, an indication of whether the
1119+
* service is biased to recognize shorter or longer strings of characters when developing
1120+
* transcription hypotheses. By default, the service is optimized for each individual model to
1121+
* balance its recognition of strings of different lengths. The model-specific bias is equivalent
1122+
* to 0.0.
1123+
*
1124+
* <p>The value that you specify represents a change from a model's default bias. The allowable
1125+
* range of values is -1.0 to 1.0. * Negative values bias the service to favor hypotheses with
1126+
* shorter strings of characters. * Positive values bias the service to favor hypotheses with
1127+
* longer strings of characters.
1128+
*
1129+
* <p>As the value approaches -1.0 or 1.0, the impact of the parameter becomes more pronounced. To
1130+
* determine the most effective value for your scenario, start by setting the value of the
1131+
* parameter to a small increment, such as -0.1, -0.05, 0.05, or 0.1, and assess how the value
1132+
* impacts the transcription results. Then experiment with different values as necessary,
1133+
* adjusting the value by small increments.
1134+
*
1135+
* <p>The parameter is not available for previous-generation `Broadband` and `Narrowband` models.
1136+
*
1137+
* <p>See [Character insertion
1138+
* bias](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-parsing#insertion-bias).
1139+
*
1140+
* @return the characterInsertionBias
1141+
*/
1142+
public Float characterInsertionBias() {
1143+
return characterInsertionBias;
1144+
}
1145+
10311146
/**
10321147
* Gets the interimResults.
10331148
*

speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/websocket/SpeechToTextWebSocketListener.java

-2
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ public final class SpeechToTextWebSocketListener extends WebSocketListener {
5454
private static final String RESULTS = "results";
5555
private static final String SPEAKER_LABELS = "speaker_labels";
5656
private static final String AUDIO_METRICS = "audio_metrics";
57-
private static final String CUSTOMIZATION_ID = "customization_id";
5857
private static final String LANGUAGE_CUSTOMIZATION_ID = "language_customization_id";
5958
private static final String ACOUSTIC_CUSTOMIZATION_ID = "acoustic_customization_id";
6059
private static final String VERSION = "base_model_version";
@@ -236,7 +235,6 @@ private String buildStartMessage(RecognizeWithWebsocketsOptions options) {
236235
.create();
237236
JsonObject startMessage = new JsonParser().parse(gson.toJson(options)).getAsJsonObject();
238237
startMessage.remove(MODEL);
239-
startMessage.remove(CUSTOMIZATION_ID);
240238
startMessage.remove(LANGUAGE_CUSTOMIZATION_ID);
241239
startMessage.remove(ACOUSTIC_CUSTOMIZATION_ID);
242240
startMessage.remove(VERSION);

text-to-speech/src/main/java/com/ibm/watson/text_to_speech/v1/websocket/TextToSpeechWebSocketListener.java

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ public class TextToSpeechWebSocketListener extends WebSocketListener {
2424

2525
private static final String VOICE = "voice";
2626
private static final String CUSTOMIZATION_ID = "customization_id";
27+
private static final String SPELL_OUT_MODE = "spell_out_mode";
2728
private static final String ACTION = "action";
2829
private static final String START = "start";
2930
private static final String STOP = "stop";
@@ -157,6 +158,7 @@ private String buildStartMessage(SynthesizeOptions options) {
157158
// remove options that are already in query string
158159
startMessage.remove(VOICE);
159160
startMessage.remove(CUSTOMIZATION_ID);
161+
startMessage.remove(SPELL_OUT_MODE);
160162

161163
startMessage.addProperty(ACTION, START);
162164
return startMessage.toString();

0 commit comments

Comments
 (0)