Spaces:
Sleeping
Sleeping
whisper : add no_context parameter to whisper_params (#3045)
Browse files
examples/server/server.cpp
CHANGED
|
@@ -79,6 +79,7 @@ struct whisper_params {
|
|
| 79 |
bool use_gpu = true;
|
| 80 |
bool flash_attn = false;
|
| 81 |
bool suppress_nst = false;
|
|
|
|
| 82 |
|
| 83 |
std::string language = "en";
|
| 84 |
std::string prompt = "";
|
|
@@ -140,6 +141,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|
| 140 |
fprintf(stderr, " --convert, [%-7s] Convert audio to WAV, requires ffmpeg on the server\n", sparams.ffmpeg_converter ? "true" : "false");
|
| 141 |
fprintf(stderr, " -sns, --suppress-nst [%-7s] suppress non-speech tokens\n", params.suppress_nst ? "true" : "false");
|
| 142 |
fprintf(stderr, " -nth N, --no-speech-thold N [%-7.2f] no speech threshold\n", params.no_speech_thold);
|
|
|
|
| 143 |
fprintf(stderr, "\n");
|
| 144 |
}
|
| 145 |
|
|
@@ -186,6 +188,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params, serve
|
|
| 186 |
else if (arg == "-fa" || arg == "--flash-attn") { params.flash_attn = true; }
|
| 187 |
else if (arg == "-sns" || arg == "--suppress-nst") { params.suppress_nst = true; }
|
| 188 |
else if (arg == "-nth" || arg == "--no-speech-thold") { params.no_speech_thold = std::stof(argv[++i]); }
|
|
|
|
| 189 |
|
| 190 |
// server params
|
| 191 |
else if ( arg == "--port") { sparams.port = std::stoi(argv[++i]); }
|
|
@@ -506,6 +509,10 @@ void get_req_parameters(const Request & req, whisper_params & params)
|
|
| 506 |
{
|
| 507 |
params.suppress_nst = parse_str_to_bool(req.get_file_value("suppress_nst").content);
|
| 508 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 509 |
}
|
| 510 |
|
| 511 |
} // namespace
|
|
@@ -818,6 +825,7 @@ int main(int argc, char ** argv) {
|
|
| 818 |
|
| 819 |
wparams.no_timestamps = params.no_timestamps;
|
| 820 |
wparams.token_timestamps = !params.no_timestamps && params.response_format == vjson_format;
|
|
|
|
| 821 |
|
| 822 |
wparams.suppress_nst = params.suppress_nst;
|
| 823 |
|
|
|
|
| 79 |
bool use_gpu = true;
|
| 80 |
bool flash_attn = false;
|
| 81 |
bool suppress_nst = false;
|
| 82 |
+
bool no_context = false;
|
| 83 |
|
| 84 |
std::string language = "en";
|
| 85 |
std::string prompt = "";
|
|
|
|
| 141 |
fprintf(stderr, " --convert, [%-7s] Convert audio to WAV, requires ffmpeg on the server\n", sparams.ffmpeg_converter ? "true" : "false");
|
| 142 |
fprintf(stderr, " -sns, --suppress-nst [%-7s] suppress non-speech tokens\n", params.suppress_nst ? "true" : "false");
|
| 143 |
fprintf(stderr, " -nth N, --no-speech-thold N [%-7.2f] no speech threshold\n", params.no_speech_thold);
|
| 144 |
+
fprintf(stderr, " -nc, --no-context [%-7s] do not use previous audio context\n", params.no_context ? "true" : "false");
|
| 145 |
fprintf(stderr, "\n");
|
| 146 |
}
|
| 147 |
|
|
|
|
| 188 |
else if (arg == "-fa" || arg == "--flash-attn") { params.flash_attn = true; }
|
| 189 |
else if (arg == "-sns" || arg == "--suppress-nst") { params.suppress_nst = true; }
|
| 190 |
else if (arg == "-nth" || arg == "--no-speech-thold") { params.no_speech_thold = std::stof(argv[++i]); }
|
| 191 |
+
else if (arg == "-nc" || arg == "--no-context") { params.no_context = true; }
|
| 192 |
|
| 193 |
// server params
|
| 194 |
else if ( arg == "--port") { sparams.port = std::stoi(argv[++i]); }
|
|
|
|
| 509 |
{
|
| 510 |
params.suppress_nst = parse_str_to_bool(req.get_file_value("suppress_nst").content);
|
| 511 |
}
|
| 512 |
+
if (req.has_file("no_context"))
|
| 513 |
+
{
|
| 514 |
+
params.no_context = parse_str_to_bool(req.get_file_value("no_context").content);
|
| 515 |
+
}
|
| 516 |
}
|
| 517 |
|
| 518 |
} // namespace
|
|
|
|
| 825 |
|
| 826 |
wparams.no_timestamps = params.no_timestamps;
|
| 827 |
wparams.token_timestamps = !params.no_timestamps && params.response_format == vjson_format;
|
| 828 |
+
wparams.no_context = params.no_context;
|
| 829 |
|
| 830 |
wparams.suppress_nst = params.suppress_nst;
|
| 831 |
|