Spaces:
Running
Running
Improve result printing
Browse files- main.cpp +4 -1
- whisper.cpp +37 -1
- whisper.h +2 -0
main.cpp
CHANGED
|
@@ -181,6 +181,9 @@ int main(int argc, char ** argv) {
|
|
| 181 |
{
|
| 182 |
whisper_full_params wparams = whisper_full_default_params(WHISPER_DECODE_GREEDY);
|
| 183 |
|
|
|
|
|
|
|
|
|
|
| 184 |
wparams.print_special_tokens = params.print_special_tokens;
|
| 185 |
|
| 186 |
if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
|
|
@@ -189,7 +192,7 @@ int main(int argc, char ** argv) {
|
|
| 189 |
}
|
| 190 |
|
| 191 |
// print result;
|
| 192 |
-
{
|
| 193 |
printf("\n");
|
| 194 |
|
| 195 |
const int n_segments = whisper_full_n_segments(ctx);
|
|
|
|
| 181 |
{
|
| 182 |
whisper_full_params wparams = whisper_full_default_params(WHISPER_DECODE_GREEDY);
|
| 183 |
|
| 184 |
+
wparams.print_realtime = true;
|
| 185 |
+
wparams.print_progress = false;
|
| 186 |
+
wparams.print_timestamps = !params.no_timestamps;
|
| 187 |
wparams.print_special_tokens = params.print_special_tokens;
|
| 188 |
|
| 189 |
if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
|
|
|
|
| 192 |
}
|
| 193 |
|
| 194 |
// print result;
|
| 195 |
+
if (!wparams.print_realtime) {
|
| 196 |
printf("\n");
|
| 197 |
|
| 198 |
const int n_segments = whisper_full_n_segments(ctx);
|
whisper.cpp
CHANGED
|
@@ -1854,6 +1854,18 @@ whisper_vocab::id whisper_sample_timestamp(
|
|
| 1854 |
return probs_id[0].second;
|
| 1855 |
}
|
| 1856 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1857 |
// naive Discrete Fourier Transform
|
| 1858 |
// input is real-valued
|
| 1859 |
// output is complex-valued
|
|
@@ -2245,6 +2257,8 @@ struct whisper_full_params whisper_full_default_params(enum whisper_decode_strat
|
|
| 2245 |
.translate = false,
|
| 2246 |
.print_special_tokens = false,
|
| 2247 |
.print_progress = true,
|
|
|
|
|
|
|
| 2248 |
|
| 2249 |
.language = "en",
|
| 2250 |
|
|
@@ -2262,6 +2276,8 @@ struct whisper_full_params whisper_full_default_params(enum whisper_decode_strat
|
|
| 2262 |
.translate = false,
|
| 2263 |
.print_special_tokens = false,
|
| 2264 |
.print_progress = true,
|
|
|
|
|
|
|
| 2265 |
|
| 2266 |
.language = "en",
|
| 2267 |
|
|
@@ -2436,6 +2452,15 @@ int whisper_full(
|
|
| 2436 |
if (result_cur[i].id > whisper_token_beg(ctx)) {
|
| 2437 |
const auto t1 = result_cur[i].t;
|
| 2438 |
if (!text.empty()) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2439 |
result_all.push_back({ t0, t1, text });
|
| 2440 |
}
|
| 2441 |
text = "";
|
|
@@ -2448,7 +2473,18 @@ int whisper_full(
|
|
| 2448 |
}
|
| 2449 |
|
| 2450 |
if (!text.empty()) {
|
| 2451 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2452 |
}
|
| 2453 |
}
|
| 2454 |
|
|
|
|
| 1854 |
return probs_id[0].second;
|
| 1855 |
}
|
| 1856 |
|
| 1857 |
+
static std::string to_timestamp(int64_t t) {
|
| 1858 |
+
int64_t sec = t/100;
|
| 1859 |
+
int64_t msec = t - sec*100;
|
| 1860 |
+
int64_t min = sec/60;
|
| 1861 |
+
sec = sec - min*60;
|
| 1862 |
+
|
| 1863 |
+
char buf[32];
|
| 1864 |
+
snprintf(buf, sizeof(buf), "%02d:%02d.%03d", (int) min, (int) sec, (int) msec);
|
| 1865 |
+
|
| 1866 |
+
return std::string(buf);
|
| 1867 |
+
}
|
| 1868 |
+
|
| 1869 |
// naive Discrete Fourier Transform
|
| 1870 |
// input is real-valued
|
| 1871 |
// output is complex-valued
|
|
|
|
| 2257 |
.translate = false,
|
| 2258 |
.print_special_tokens = false,
|
| 2259 |
.print_progress = true,
|
| 2260 |
+
.print_realtime = false,
|
| 2261 |
+
.print_timestamps = true,
|
| 2262 |
|
| 2263 |
.language = "en",
|
| 2264 |
|
|
|
|
| 2276 |
.translate = false,
|
| 2277 |
.print_special_tokens = false,
|
| 2278 |
.print_progress = true,
|
| 2279 |
+
.print_realtime = false,
|
| 2280 |
+
.print_timestamps = true,
|
| 2281 |
|
| 2282 |
.language = "en",
|
| 2283 |
|
|
|
|
| 2452 |
if (result_cur[i].id > whisper_token_beg(ctx)) {
|
| 2453 |
const auto t1 = result_cur[i].t;
|
| 2454 |
if (!text.empty()) {
|
| 2455 |
+
if (params.print_realtime) {
|
| 2456 |
+
if (params.print_timestamps) {
|
| 2457 |
+
printf("[%s --> %s] %s\n", to_timestamp(t0).c_str(), to_timestamp(t1).c_str(), text.c_str());
|
| 2458 |
+
} else {
|
| 2459 |
+
printf("%s", text.c_str());
|
| 2460 |
+
fflush(stdout);
|
| 2461 |
+
}
|
| 2462 |
+
}
|
| 2463 |
+
|
| 2464 |
result_all.push_back({ t0, t1, text });
|
| 2465 |
}
|
| 2466 |
text = "";
|
|
|
|
| 2473 |
}
|
| 2474 |
|
| 2475 |
if (!text.empty()) {
|
| 2476 |
+
const auto t1 = seek + seek_delta;
|
| 2477 |
+
|
| 2478 |
+
if (params.print_realtime) {
|
| 2479 |
+
if (params.print_timestamps) {
|
| 2480 |
+
printf("[%s --> %s] %s\n", to_timestamp(t0).c_str(), to_timestamp(t1).c_str(), text.c_str());
|
| 2481 |
+
} else {
|
| 2482 |
+
printf("%s", text.c_str());
|
| 2483 |
+
fflush(stdout);
|
| 2484 |
+
}
|
| 2485 |
+
}
|
| 2486 |
+
|
| 2487 |
+
result_all.push_back({ t0, t1, text });
|
| 2488 |
}
|
| 2489 |
}
|
| 2490 |
|
whisper.h
CHANGED
|
@@ -106,6 +106,8 @@ extern "C" {
|
|
| 106 |
bool translate;
|
| 107 |
bool print_special_tokens;
|
| 108 |
bool print_progress;
|
|
|
|
|
|
|
| 109 |
|
| 110 |
const char * language;
|
| 111 |
|
|
|
|
| 106 |
bool translate;
|
| 107 |
bool print_special_tokens;
|
| 108 |
bool print_progress;
|
| 109 |
+
bool print_realtime;
|
| 110 |
+
bool print_timestamps;
|
| 111 |
|
| 112 |
const char * language;
|
| 113 |
|