Skip to content

Commit a0d4348

Browse files
authored
Merge pull request ggml-org#5 from bobqianic/push
Push
2 parents e2e5177 + 7a5a2e9 commit a0d4348

File tree

6 files changed

+172
-143
lines changed

6 files changed

+172
-143
lines changed

examples/common.cpp

-50
Original file line numberDiff line numberDiff line change
@@ -616,56 +616,6 @@ gpt_vocab::id gpt_sample_top_k_top_p_repeat(
616616

617617
}
618618

619-
620-
namespace utf_8 {
621-
bool is_valid(const std::string &str) {
622-
uint64_t count = 0; // Count of bytes in the current UTF-8 character
623-
624-
for (unsigned char c : str) {
625-
if (count == 0) {
626-
if ((c >> 5) == 0b110) count = 1; // 2-byte character
627-
else if ((c >> 4) == 0b1110) count = 2; // 3-byte character
628-
else if ((c >> 3) == 0b11110) count = 3; // 4-byte character
629-
else if ((c >> 7) == 0b0) count = 0; // 1-byte character
630-
else return false; // Invalid UTF-8
631-
} else {
632-
if ((c >> 6) != 0b10) return false; // Subsequent bytes should start with 10
633-
count--;
634-
}
635-
}
636-
637-
return count == 0; // Ensure all UTF-8 characters are complete
638-
}
639-
640-
std::vector<std::string> merge_and_split(const std::string &str) {
641-
std::vector<std::string> result;
642-
std::string buffer;
643-
uint64_t count = 0; // Count of bytes in the current UTF-8 character
644-
645-
for (unsigned char c : str) {
646-
if (count == 0) {
647-
header:
648-
if ((c >> 5) == 0b110) count = 1; // 2-byte character
649-
else if ((c >> 4) == 0b1110) count = 2; // 3-byte character
650-
else if ((c >> 3) == 0b11110) count = 3; // 4-byte character
651-
else count = 0; // Invalid UTF-8 || 1-byte character
652-
if (!buffer.empty()) result.push_back(buffer);
653-
buffer.clear();
654-
buffer += static_cast<char>(c);
655-
} else {
656-
if ((c >> 6) != 0b10) {
657-
goto header;
658-
} // Subsequent bytes should start with 10
659-
buffer += static_cast<char>(c);
660-
count--;
661-
}
662-
}
663-
664-
if (!buffer.empty()) result.push_back(buffer);
665-
return result;
666-
}
667-
}
668-
669619
bool is_wav_buffer(const std::string buf) {
670620
// RIFF ref: https://en.wikipedia.org/wiki/Resource_Interchange_File_Format
671621
// WAV ref: https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html

examples/common.h

-5
Original file line numberDiff line numberDiff line change
@@ -131,11 +131,6 @@ gpt_vocab::id gpt_sample_top_k_top_p_repeat(
131131
float repeat_penalty,
132132
std::mt19937 & rng);
133133

134-
namespace utf_8{
135-
bool is_valid(const std::string &str);
136-
std::vector<std::string> merge_and_split(const std::string &str);
137-
}
138-
139134
//
140135
// Audio utils
141136
//

examples/main/main.cpp

+16-21
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ struct whisper_params {
7777
bool detect_language = false;
7878
bool diarize = false;
7979
bool tinydiarize = false;
80-
bool split_on_word = false;
8180
bool no_fallback = false;
8281
bool output_txt = false;
8382
bool output_vtt = false;
@@ -149,7 +148,6 @@ bool whisper_params_parse(int argc, const char ** argv, whisper_params & params)
149148
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
150149
else if (arg == "-di" || arg == "--diarize") { params.diarize = true; }
151150
else if (arg == "-tdrz" || arg == "--tinydiarize") { params.tinydiarize = true; }
152-
else if (arg == "-sow" || arg == "--split-on-word") { params.split_on_word = true; }
153151
else if (arg == "-nf" || arg == "--no-fallback") { params.no_fallback = true; }
154152
else if (arg == "-otxt" || arg == "--output-txt") { params.output_txt = true; }
155153
else if (arg == "-ovtt" || arg == "--output-vtt") { params.output_vtt = true; }
@@ -197,7 +195,6 @@ void whisper_print_usage(int /*argc*/, const char ** argv, const whisper_params
197195
fprintf(stderr, " -d N, --duration N [%-7d] duration of audio to process in milliseconds\n", params.duration_ms);
198196
fprintf(stderr, " -mc N, --max-context N [%-7d] maximum number of text context tokens to store\n", params.max_context);
199197
fprintf(stderr, " -ml N, --max-len N [%-7d] maximum segment length in characters\n", params.max_len);
200-
fprintf(stderr, " -sow, --split-on-word [%-7s] split on word rather than on token\n", params.split_on_word ? "true" : "false");
201198
fprintf(stderr, " -bo N, --best-of N [%-7d] number of best candidates to keep\n", params.best_of);
202199
fprintf(stderr, " -bs N, --beam-size N [%-7d] beam size for beam search\n", params.beam_size);
203200
fprintf(stderr, " -wt N, --word-thold N [%-7.2f] word timestamp probability threshold\n", params.word_thold);
@@ -320,6 +317,10 @@ void whisper_print_segment_callback(struct whisper_context * ctx, struct whisper
320317

321318

322319
if (params.print_colors) {
320+
std::string buffer;
321+
float probability_sum = 0;
322+
int count = 0;
323+
323324
for (int j = 0; j < whisper_full_n_tokens(ctx, i); ++j) {
324325
if (params.print_special == false) {
325326
const whisper_token id = whisper_full_get_token_id(ctx, i, j);
@@ -328,26 +329,21 @@ void whisper_print_segment_callback(struct whisper_context * ctx, struct whisper
328329
}
329330
}
330331

331-
const char * text = whisper_full_get_token_text(ctx, i, j);
332-
const float p = whisper_full_get_token_p (ctx, i, j);
333-
const int col = std::max(0, std::min((int) k_colors.size() - 1, (int) (std::pow(p, 3)*float(k_colors.size()))));
334-
// if (utf_8::is_valid(text)) {
335-
// printf("%s%s%s", k_colors[col].c_str(), text, "\033[0m");
336-
// } else {
337-
printf("%s[_%i_]%s", k_colors[col].c_str(), whisper_full_get_token_id(ctx, i, j), "\033[0m");
338-
// }
332+
buffer += whisper_full_get_token_text(ctx, i, j);
333+
probability_sum += whisper_full_get_token_p (ctx, i, j);
334+
count++;
335+
const int col = std::max(0, std::min((int) k_colors.size() - 1, (int) (std::pow(probability_sum/static_cast<float>(count), 3)*float(k_colors.size()))));
336+
337+
if (whisper_utf8_is_valid(buffer.c_str())) {
338+
printf("%s%s%s", k_colors[col].c_str(), buffer.c_str(), "\033[0m");
339+
buffer.clear();
340+
probability_sum = 0;
341+
count = 0;
342+
}
339343
}
340344
} else {
341345
const char * text = whisper_full_get_segment_text(ctx, i);
342-
for (auto &k : utf_8::merge_and_split(text)) {
343-
if (utf_8::is_valid(k)) {
344-
printf("%s", k.c_str());
345-
} else {
346-
for (auto l : k) {
347-
printf("[_%i_]", l);
348-
}
349-
}
350-
}
346+
printf("%s", text);
351347
}
352348

353349
if (params.tinydiarize) {
@@ -1016,7 +1012,6 @@ int run(int argc, const char ** argv) {
10161012
wparams.token_timestamps = params.output_wts || params.output_jsn_full || params.max_len > 0;
10171013
wparams.thold_pt = params.word_thold;
10181014
wparams.max_len = params.output_wts && params.max_len == 0 ? 60 : params.max_len;
1019-
wparams.split_on_word = params.split_on_word;
10201015

10211016
wparams.speed_up = params.speed_up;
10221017
wparams.debug_mode = params.debug_mode;

examples/server/server.cpp

-8
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ struct whisper_params {
7373
bool detect_language = false;
7474
bool diarize = false;
7575
bool tinydiarize = false;
76-
bool split_on_word = false;
7776
bool no_fallback = false;
7877
bool print_special = false;
7978
bool print_colors = false;
@@ -136,7 +135,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
136135
fprintf(stderr, " -d N, --duration N [%-7d] duration of audio to process in milliseconds\n", params.duration_ms);
137136
fprintf(stderr, " -mc N, --max-context N [%-7d] maximum number of text context tokens to store\n", params.max_context);
138137
fprintf(stderr, " -ml N, --max-len N [%-7d] maximum segment length in characters\n", params.max_len);
139-
fprintf(stderr, " -sow, --split-on-word [%-7s] split on word rather than on token\n", params.split_on_word ? "true" : "false");
140138
fprintf(stderr, " -bo N, --best-of N [%-7d] number of best candidates to keep\n", params.best_of);
141139
fprintf(stderr, " -bs N, --beam-size N [%-7d] beam size for beam search\n", params.beam_size);
142140
fprintf(stderr, " -wt N, --word-thold N [%-7.2f] word timestamp probability threshold\n", params.word_thold);
@@ -192,7 +190,6 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params, serve
192190
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
193191
else if (arg == "-di" || arg == "--diarize") { params.diarize = true; }
194192
else if (arg == "-tdrz" || arg == "--tinydiarize") { params.tinydiarize = true; }
195-
else if (arg == "-sow" || arg == "--split-on-word") { params.split_on_word = true; }
196193
else if (arg == "-nf" || arg == "--no-fallback") { params.no_fallback = true; }
197194
else if (arg == "-fp" || arg == "--font-path") { params.font_path = argv[++i]; }
198195
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
@@ -462,10 +459,6 @@ void get_req_parameters(const Request & req, whisper_params & params)
462459
{
463460
params.tinydiarize = parse_str_to_bool(req.get_file_value("tinydiarize").content);
464461
}
465-
if (req.has_file("split_on_word"))
466-
{
467-
params.split_on_word = parse_str_to_bool(req.get_file_value("split_on_word").content);
468-
}
469462
if (req.has_file("no_timestamps"))
470463
{
471464
params.no_timestamps = parse_str_to_bool(req.get_file_value("no_timestamps").content);
@@ -738,7 +731,6 @@ int main(int argc, char ** argv) {
738731

739732
wparams.thold_pt = params.word_thold;
740733
wparams.max_len = params.max_len == 0 ? 60 : params.max_len;
741-
wparams.split_on_word = params.split_on_word;
742734

743735
wparams.speed_up = params.speed_up;
744736
wparams.debug_mode = params.debug_mode;

0 commit comments

Comments
 (0)