ggerganov commited on
Commit
9a513f5
·
1 Parent(s): dabc473

Improve result printing

Browse files
Files changed (3) hide show
  1. main.cpp +4 -1
  2. whisper.cpp +37 -1
  3. whisper.h +2 -0
main.cpp CHANGED
@@ -181,6 +181,9 @@ int main(int argc, char ** argv) {
181
  {
182
  whisper_full_params wparams = whisper_full_default_params(WHISPER_DECODE_GREEDY);
183
 
 
 
 
184
  wparams.print_special_tokens = params.print_special_tokens;
185
 
186
  if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
@@ -189,7 +192,7 @@ int main(int argc, char ** argv) {
189
  }
190
 
191
  // print result;
192
- {
193
  printf("\n");
194
 
195
  const int n_segments = whisper_full_n_segments(ctx);
 
181
  {
182
  whisper_full_params wparams = whisper_full_default_params(WHISPER_DECODE_GREEDY);
183
 
184
+ wparams.print_realtime = true;
185
+ wparams.print_progress = false;
186
+ wparams.print_timestamps = !params.no_timestamps;
187
  wparams.print_special_tokens = params.print_special_tokens;
188
 
189
  if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
 
192
  }
193
 
194
  // print result;
195
+ if (!wparams.print_realtime) {
196
  printf("\n");
197
 
198
  const int n_segments = whisper_full_n_segments(ctx);
whisper.cpp CHANGED
@@ -1854,6 +1854,18 @@ whisper_vocab::id whisper_sample_timestamp(
1854
  return probs_id[0].second;
1855
  }
1856
 
 
 
 
 
 
 
 
 
 
 
 
 
1857
  // naive Discrete Fourier Transform
1858
  // input is real-valued
1859
  // output is complex-valued
@@ -2245,6 +2257,8 @@ struct whisper_full_params whisper_full_default_params(enum whisper_decode_strat
2245
  .translate = false,
2246
  .print_special_tokens = false,
2247
  .print_progress = true,
 
 
2248
 
2249
  .language = "en",
2250
 
@@ -2262,6 +2276,8 @@ struct whisper_full_params whisper_full_default_params(enum whisper_decode_strat
2262
  .translate = false,
2263
  .print_special_tokens = false,
2264
  .print_progress = true,
 
 
2265
 
2266
  .language = "en",
2267
 
@@ -2436,6 +2452,15 @@ int whisper_full(
2436
  if (result_cur[i].id > whisper_token_beg(ctx)) {
2437
  const auto t1 = result_cur[i].t;
2438
  if (!text.empty()) {
 
 
 
 
 
 
 
 
 
2439
  result_all.push_back({ t0, t1, text });
2440
  }
2441
  text = "";
@@ -2448,7 +2473,18 @@ int whisper_full(
2448
  }
2449
 
2450
  if (!text.empty()) {
2451
- result_all.push_back({ t0, seek + seek_delta, text });
 
 
 
 
 
 
 
 
 
 
 
2452
  }
2453
  }
2454
 
 
1854
  return probs_id[0].second;
1855
  }
1856
 
1857
+ static std::string to_timestamp(int64_t t) {
1858
+ int64_t sec = t/100;
1859
+ int64_t msec = t - sec*100;
1860
+ int64_t min = sec/60;
1861
+ sec = sec - min*60;
1862
+
1863
+ char buf[32];
1864
+ snprintf(buf, sizeof(buf), "%02d:%02d.%03d", (int) min, (int) sec, (int) msec);
1865
+
1866
+ return std::string(buf);
1867
+ }
1868
+
1869
  // naive Discrete Fourier Transform
1870
  // input is real-valued
1871
  // output is complex-valued
 
2257
  .translate = false,
2258
  .print_special_tokens = false,
2259
  .print_progress = true,
2260
+ .print_realtime = false,
2261
+ .print_timestamps = true,
2262
 
2263
  .language = "en",
2264
 
 
2276
  .translate = false,
2277
  .print_special_tokens = false,
2278
  .print_progress = true,
2279
+ .print_realtime = false,
2280
+ .print_timestamps = true,
2281
 
2282
  .language = "en",
2283
 
 
2452
  if (result_cur[i].id > whisper_token_beg(ctx)) {
2453
  const auto t1 = result_cur[i].t;
2454
  if (!text.empty()) {
2455
+ if (params.print_realtime) {
2456
+ if (params.print_timestamps) {
2457
+ printf("[%s --> %s] %s\n", to_timestamp(t0).c_str(), to_timestamp(t1).c_str(), text.c_str());
2458
+ } else {
2459
+ printf("%s", text.c_str());
2460
+ fflush(stdout);
2461
+ }
2462
+ }
2463
+
2464
  result_all.push_back({ t0, t1, text });
2465
  }
2466
  text = "";
 
2473
  }
2474
 
2475
  if (!text.empty()) {
2476
+ const auto t1 = seek + seek_delta;
2477
+
2478
+ if (params.print_realtime) {
2479
+ if (params.print_timestamps) {
2480
+ printf("[%s --> %s] %s\n", to_timestamp(t0).c_str(), to_timestamp(t1).c_str(), text.c_str());
2481
+ } else {
2482
+ printf("%s", text.c_str());
2483
+ fflush(stdout);
2484
+ }
2485
+ }
2486
+
2487
+ result_all.push_back({ t0, t1, text });
2488
  }
2489
  }
2490
 
whisper.h CHANGED
@@ -106,6 +106,8 @@ extern "C" {
106
  bool translate;
107
  bool print_special_tokens;
108
  bool print_progress;
 
 
109
 
110
  const char * language;
111
 
 
106
  bool translate;
107
  bool print_special_tokens;
108
  bool print_progress;
109
+ bool print_realtime;
110
+ bool print_timestamps;
111
 
112
  const char * language;
113