Spaces:

natasa365
/

whisper.cpp

Running

App Files Files Community

whisper.cpp / tests /test-vad-full.cpp

danbev

vad : add initial Voice Activity Detection (VAD) support (#3065)

a28f35e unverified 7 months ago

raw

history blame

1.82 kB

	#include "whisper.h"
	#include "common-whisper.h"

	#include <cstdio>
	#include <cfloat>
	#include <string>
	#include <cstring>

	#ifdef NDEBUG
	#undef NDEBUG
	#endif

	#include <cassert>

	int main() {
	std::string whisper_model_path = "../../models/ggml-base.en.bin";
	std::string vad_model_path = "../../models/for-tests-silero-v5.1.2-ggml.bin";
	std::string sample_path = "../../samples/jfk.wav";

	// Load the sample audio file
	std::vector<float> pcmf32;
	std::vector<std::vector<float>> pcmf32s;
	assert(read_audio_data(sample_path.c_str(), pcmf32, pcmf32s, false));

	struct whisper_context_params cparams = whisper_context_default_params();
	struct whisper_context * wctx = whisper_init_from_file_with_params(
	whisper_model_path.c_str(),
	cparams);

	struct whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_BEAM_SEARCH);
	wparams.vad = true;
	wparams.vad_model_path = vad_model_path.c_str();

	wparams.vad_params.threshold = 0.5f;
	wparams.vad_params.min_speech_duration_ms = 250;
	wparams.vad_params.min_silence_duration_ms = 100;
	wparams.vad_params.max_speech_duration_s = FLT_MAX;
	wparams.vad_params.speech_pad_ms = 30;

	assert(whisper_full_parallel(wctx, wparams, pcmf32.data(), pcmf32.size(), 1) == 0);

	const int n_segments = whisper_full_n_segments(wctx);
	assert(n_segments == 1);

	assert(strcmp(" And so my fellow Americans, ask not what your country can do for you,"
	" ask what you can do for your country.",
	whisper_full_get_segment_text(wctx, 0)) == 0);
	assert(whisper_full_get_segment_t0(wctx, 0) == 29);
	assert(whisper_full_get_segment_t1(wctx, 0) == 1049);

	whisper_free(wctx);

	return 0;
	}