@@ -77,7 +77,6 @@ struct whisper_params {
77
77
bool detect_language = false ;
78
78
bool diarize = false ;
79
79
bool tinydiarize = false ;
80
- bool split_on_word = false ;
81
80
bool no_fallback = false ;
82
81
bool output_txt = false ;
83
82
bool output_vtt = false ;
@@ -149,7 +148,6 @@ bool whisper_params_parse(int argc, const char ** argv, whisper_params & params)
149
148
else if (arg == " -tr" || arg == " --translate" ) { params.translate = true ; }
150
149
else if (arg == " -di" || arg == " --diarize" ) { params.diarize = true ; }
151
150
else if (arg == " -tdrz" || arg == " --tinydiarize" ) { params.tinydiarize = true ; }
152
- else if (arg == " -sow" || arg == " --split-on-word" ) { params.split_on_word = true ; }
153
151
else if (arg == " -nf" || arg == " --no-fallback" ) { params.no_fallback = true ; }
154
152
else if (arg == " -otxt" || arg == " --output-txt" ) { params.output_txt = true ; }
155
153
else if (arg == " -ovtt" || arg == " --output-vtt" ) { params.output_vtt = true ; }
@@ -197,7 +195,6 @@ void whisper_print_usage(int /*argc*/, const char ** argv, const whisper_params
197
195
fprintf (stderr, " -d N, --duration N [%-7d] duration of audio to process in milliseconds\n " , params.duration_ms );
198
196
fprintf (stderr, " -mc N, --max-context N [%-7d] maximum number of text context tokens to store\n " , params.max_context );
199
197
fprintf (stderr, " -ml N, --max-len N [%-7d] maximum segment length in characters\n " , params.max_len );
200
- fprintf (stderr, " -sow, --split-on-word [%-7s] split on word rather than on token\n " , params.split_on_word ? " true" : " false" );
201
198
fprintf (stderr, " -bo N, --best-of N [%-7d] number of best candidates to keep\n " , params.best_of );
202
199
fprintf (stderr, " -bs N, --beam-size N [%-7d] beam size for beam search\n " , params.beam_size );
203
200
fprintf (stderr, " -wt N, --word-thold N [%-7.2f] word timestamp probability threshold\n " , params.word_thold );
@@ -320,6 +317,10 @@ void whisper_print_segment_callback(struct whisper_context * ctx, struct whisper
320
317
321
318
322
319
if (params.print_colors ) {
320
+ std::string buffer;
321
+ float probability_sum = 0 ;
322
+ int count = 0 ;
323
+
323
324
for (int j = 0 ; j < whisper_full_n_tokens (ctx, i); ++j) {
324
325
if (params.print_special == false ) {
325
326
const whisper_token id = whisper_full_get_token_id (ctx, i, j);
@@ -328,26 +329,21 @@ void whisper_print_segment_callback(struct whisper_context * ctx, struct whisper
328
329
}
329
330
}
330
331
331
- const char * text = whisper_full_get_token_text (ctx, i, j);
332
- const float p = whisper_full_get_token_p (ctx, i, j);
333
- const int col = std::max (0 , std::min ((int ) k_colors.size () - 1 , (int ) (std::pow (p, 3 )*float (k_colors.size ()))));
334
- // if (utf_8::is_valid(text)) {
335
- // printf("%s%s%s", k_colors[col].c_str(), text, "\033[0m");
336
- // } else {
337
- printf (" %s[_%i_]%s" , k_colors[col].c_str (), whisper_full_get_token_id (ctx, i, j), " \033 [0m" );
338
- // }
332
+ buffer += whisper_full_get_token_text (ctx, i, j);
333
+ probability_sum += whisper_full_get_token_p (ctx, i, j);
334
+ count++;
335
+ const int col = std::max (0 , std::min ((int ) k_colors.size () - 1 , (int ) (std::pow (probability_sum/static_cast <float >(count), 3 )*float (k_colors.size ()))));
336
+
337
+ if (whisper_utf8_is_valid (buffer.c_str ())) {
338
+ printf (" %s%s%s" , k_colors[col].c_str (), buffer.c_str (), " \033 [0m" );
339
+ buffer.clear ();
340
+ probability_sum = 0 ;
341
+ count = 0 ;
342
+ }
339
343
}
340
344
} else {
341
345
const char * text = whisper_full_get_segment_text (ctx, i);
342
- for (auto &k : utf_8::merge_and_split (text)) {
343
- if (utf_8::is_valid (k)) {
344
- printf (" %s" , k.c_str ());
345
- } else {
346
- for (auto l : k) {
347
- printf (" [_%i_]" , l);
348
- }
349
- }
350
- }
346
+ printf (" %s" , text);
351
347
}
352
348
353
349
if (params.tinydiarize ) {
@@ -1016,7 +1012,6 @@ int run(int argc, const char ** argv) {
1016
1012
wparams.token_timestamps = params.output_wts || params.output_jsn_full || params.max_len > 0 ;
1017
1013
wparams.thold_pt = params.word_thold ;
1018
1014
wparams.max_len = params.output_wts && params.max_len == 0 ? 60 : params.max_len ;
1019
- wparams.split_on_word = params.split_on_word ;
1020
1015
1021
1016
wparams.speed_up = params.speed_up ;
1022
1017
wparams.debug_mode = params.debug_mode ;
0 commit comments