Spaces:
Sleeping
Sleeping
sandrohanea
Sandro Hanea
commited on
whisper : fixed Beam Search Strategy and exposed whisper_pcm_to_mel_phase_vocoder (#474)
Browse files- whisper.cpp +2 -2
- whisper.h +10 -0
whisper.cpp
CHANGED
|
@@ -2905,7 +2905,7 @@ const char * whisper_print_system_info(void) {
|
|
| 2905 |
|
| 2906 |
struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy) {
|
| 2907 |
struct whisper_full_params result = {
|
| 2908 |
-
/*.strategy =*/
|
| 2909 |
|
| 2910 |
/*.n_threads =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
|
| 2911 |
/*.n_max_text_ctx =*/ 16384,
|
|
@@ -3829,7 +3829,7 @@ int whisper_full(
|
|
| 3829 |
|
| 3830 |
auto & cur = beam_candidates[cur_c++];
|
| 3831 |
|
| 3832 |
-
while (beam_candidates[cur_c].sequence.sum_logprobs_all == cur.sequence.sum_logprobs_all && i > 0) {
|
| 3833 |
++cur_c;
|
| 3834 |
}
|
| 3835 |
|
|
|
|
| 2905 |
|
| 2906 |
struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy) {
|
| 2907 |
struct whisper_full_params result = {
|
| 2908 |
+
/*.strategy =*/ strategy,
|
| 2909 |
|
| 2910 |
/*.n_threads =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
|
| 2911 |
/*.n_max_text_ctx =*/ 16384,
|
|
|
|
| 3829 |
|
| 3830 |
auto & cur = beam_candidates[cur_c++];
|
| 3831 |
|
| 3832 |
+
while (beam_candidates.size() > cur_c && beam_candidates[cur_c].sequence.sum_logprobs_all == cur.sequence.sum_logprobs_all && i > 0) {
|
| 3833 |
++cur_c;
|
| 3834 |
}
|
| 3835 |
|
whisper.h
CHANGED
|
@@ -113,6 +113,16 @@ extern "C" {
|
|
| 113 |
int n_samples,
|
| 114 |
int n_threads);
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
// This can be used to set a custom log mel spectrogram inside the provided whisper context.
|
| 117 |
// Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
|
| 118 |
// n_mel must be 80
|
|
|
|
| 113 |
int n_samples,
|
| 114 |
int n_threads);
|
| 115 |
|
| 116 |
+
// Convert RAW PCM audio to log mel spectrogram but applies a Phase Vocoder to speed up the audio x2.
|
| 117 |
+
// The resulting spectrogram is stored inside the provided whisper context.
|
| 118 |
+
// Returns 0 on success
|
| 119 |
+
WHISPER_API int whisper_pcm_to_mel_phase_vocoder(
|
| 120 |
+
struct whisper_context* ctx,
|
| 121 |
+
const float* samples,
|
| 122 |
+
int n_samples,
|
| 123 |
+
int n_threads);
|
| 124 |
+
|
| 125 |
+
|
| 126 |
// This can be used to set a custom log mel spectrogram inside the provided whisper context.
|
| 127 |
// Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
|
| 128 |
// n_mel must be 80
|