ggerganov commited on
Commit
49a405e
·
1 Parent(s): 6b712f6

Try to improve the sampling strategy a bit

Browse files

It sill fails sometimes when it does not sample a timestamp token for
the entire segment. We now print a message in such cases

Files changed (1) hide show
  1. whisper.cpp +4 -2
whisper.cpp CHANGED
@@ -2425,7 +2425,7 @@ int whisper_full(
2425
  whisper_token id = 0;
2426
  whisper_token tid = whisper_token_beg(ctx);
2427
 
2428
- id = whisper_sample_best(ctx, result_len == 0);
2429
  if (i > 0) {
2430
  tid = whisper_sample_timestamp(ctx);
2431
  }
@@ -2445,7 +2445,9 @@ int whisper_full(
2445
  // end of text token
2446
  if (id == whisper_token_eot(ctx)) {
2447
  if (result_len == 0) {
2448
- result_len = i + 1;
 
 
2449
  }
2450
  break;
2451
  }
 
2425
  whisper_token id = 0;
2426
  whisper_token tid = whisper_token_beg(ctx);
2427
 
2428
+ id = whisper_sample_best(ctx, result_len == 0 || i > 32);
2429
  if (i > 0) {
2430
  tid = whisper_sample_timestamp(ctx);
2431
  }
 
2445
  // end of text token
2446
  if (id == whisper_token_eot(ctx)) {
2447
  if (result_len == 0) {
2448
+ // TODO: figure out how to resolve this
2449
+ fprintf(stderr, "\n%s: failed to generate timestamp token - this should not happen\n\n", __func__);
2450
+ //result_len = i + 1;
2451
  }
2452
  break;
2453
  }