sandrohanea Sandro Hanea commited on
Commit
661a3a2
·
unverified ·
1 Parent(s): 1813d16

whisper : fixed Beam Search Strategy and exposed whisper_pcm_to_mel_phase_vocoder (#474)

Browse files
Files changed (2) hide show
  1. whisper.cpp +2 -2
  2. whisper.h +10 -0
whisper.cpp CHANGED
@@ -2905,7 +2905,7 @@ const char * whisper_print_system_info(void) {
2905
 
2906
  struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy) {
2907
  struct whisper_full_params result = {
2908
- /*.strategy =*/ WHISPER_SAMPLING_GREEDY,
2909
 
2910
  /*.n_threads =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
2911
  /*.n_max_text_ctx =*/ 16384,
@@ -3829,7 +3829,7 @@ int whisper_full(
3829
 
3830
  auto & cur = beam_candidates[cur_c++];
3831
 
3832
- while (beam_candidates[cur_c].sequence.sum_logprobs_all == cur.sequence.sum_logprobs_all && i > 0) {
3833
  ++cur_c;
3834
  }
3835
 
 
2905
 
2906
  struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy) {
2907
  struct whisper_full_params result = {
2908
+ /*.strategy =*/ strategy,
2909
 
2910
  /*.n_threads =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
2911
  /*.n_max_text_ctx =*/ 16384,
 
3829
 
3830
  auto & cur = beam_candidates[cur_c++];
3831
 
3832
+ while (beam_candidates.size() > cur_c && beam_candidates[cur_c].sequence.sum_logprobs_all == cur.sequence.sum_logprobs_all && i > 0) {
3833
  ++cur_c;
3834
  }
3835
 
whisper.h CHANGED
@@ -113,6 +113,16 @@ extern "C" {
113
  int n_samples,
114
  int n_threads);
115
 
 
 
 
 
 
 
 
 
 
 
116
  // This can be used to set a custom log mel spectrogram inside the provided whisper context.
117
  // Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
118
  // n_mel must be 80
 
113
  int n_samples,
114
  int n_threads);
115
 
116
+ // Convert RAW PCM audio to log mel spectrogram but applies a Phase Vocoder to speed up the audio x2.
117
+ // The resulting spectrogram is stored inside the provided whisper context.
118
+ // Returns 0 on success
119
+ WHISPER_API int whisper_pcm_to_mel_phase_vocoder(
120
+ struct whisper_context* ctx,
121
+ const float* samples,
122
+ int n_samples,
123
+ int n_threads);
124
+
125
+
126
  // This can be used to set a custom log mel spectrogram inside the provided whisper context.
127
  // Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
128
  // n_mel must be 80