Spaces:
Running
Running
Node.js package (#260)
Browse files* npm : preparing infra for node package
* npm : package infra ready
* npm : initial version ready
* npm : change name to whisper.cpp
whisper.js is taken
- CMakeLists.txt +13 -11
- bindings/CMakeLists.txt +16 -0
- bindings/javascript/CMakeLists.txt +11 -4
- bindings/javascript/README.md +5 -0
- bindings/javascript/emscripten.cpp +31 -46
- bindings/javascript/libwhisper.worker.js +1 -0
- bindings/javascript/package-tmpl.json +26 -0
- bindings/javascript/package.json +26 -0
- bindings/javascript/whisper.js +0 -0
- examples/whisper.wasm/CMakeLists.txt +45 -3
- examples/whisper.wasm/emscripten.cpp +108 -0
- examples/whisper.wasm/index-tmpl.html +1 -1
- extra/deploy-wasm.sh +1 -1
- tests/CMakeLists.txt +10 -0
- tests/test-whisper.js +58 -0
CMakeLists.txt
CHANGED
|
@@ -14,6 +14,7 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
|
| 14 |
if (EXISTS "${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl")
|
| 15 |
configure_file(${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl ${CMAKE_SOURCE_DIR}/bindings/ios/Makefile @ONLY)
|
| 16 |
endif()
|
|
|
|
| 17 |
else()
|
| 18 |
set(WHISPER_STANDALONE OFF)
|
| 19 |
endif()
|
|
@@ -151,8 +152,7 @@ else()
|
|
| 151 |
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /arch:AVX2")
|
| 152 |
else()
|
| 153 |
if (EMSCRIPTEN)
|
| 154 |
-
|
| 155 |
-
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -msimd128")
|
| 156 |
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
| 157 |
else()
|
| 158 |
if(NOT WHISPER_NO_AVX)
|
|
@@ -203,6 +203,10 @@ if (BUILD_SHARED_LIBS)
|
|
| 203 |
)
|
| 204 |
endif()
|
| 205 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
target_compile_definitions(${TARGET} PUBLIC
|
| 207 |
${WHISPER_EXTRA_FLAGS}
|
| 208 |
)
|
|
@@ -222,13 +226,11 @@ add_subdirectory(bindings)
|
|
| 222 |
# programs, examples and tests
|
| 223 |
#
|
| 224 |
|
| 225 |
-
if (
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
add_subdirectory(tests)
|
| 229 |
-
endif ()
|
| 230 |
-
|
| 231 |
-
if (WHISPER_BUILD_EXAMPLES)
|
| 232 |
-
add_subdirectory(examples)
|
| 233 |
-
endif()
|
| 234 |
endif ()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
if (EXISTS "${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl")
|
| 15 |
configure_file(${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl ${CMAKE_SOURCE_DIR}/bindings/ios/Makefile @ONLY)
|
| 16 |
endif()
|
| 17 |
+
configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json ${CMAKE_SOURCE_DIR}/bindings/javascript/package.json @ONLY)
|
| 18 |
else()
|
| 19 |
set(WHISPER_STANDALONE OFF)
|
| 20 |
endif()
|
|
|
|
| 152 |
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /arch:AVX2")
|
| 153 |
else()
|
| 154 |
if (EMSCRIPTEN)
|
| 155 |
+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
|
|
|
|
| 156 |
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
| 157 |
else()
|
| 158 |
if(NOT WHISPER_NO_AVX)
|
|
|
|
| 203 |
)
|
| 204 |
endif()
|
| 205 |
|
| 206 |
+
if (EMSCRIPTEN)
|
| 207 |
+
set_target_properties(${TARGET} PROPERTIES COMPILE_FLAGS "-msimd128")
|
| 208 |
+
endif()
|
| 209 |
+
|
| 210 |
target_compile_definitions(${TARGET} PUBLIC
|
| 211 |
${WHISPER_EXTRA_FLAGS}
|
| 212 |
)
|
|
|
|
| 226 |
# programs, examples and tests
|
| 227 |
#
|
| 228 |
|
| 229 |
+
if (WHISPER_BUILD_TESTS)
|
| 230 |
+
enable_testing()
|
| 231 |
+
add_subdirectory(tests)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
endif ()
|
| 233 |
+
|
| 234 |
+
if (WHISPER_BUILD_EXAMPLES)
|
| 235 |
+
add_subdirectory(examples)
|
| 236 |
+
endif()
|
bindings/CMakeLists.txt
CHANGED
|
@@ -1,3 +1,19 @@
|
|
| 1 |
if (EMSCRIPTEN)
|
| 2 |
add_subdirectory(javascript)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
endif()
|
|
|
|
| 1 |
if (EMSCRIPTEN)
|
| 2 |
add_subdirectory(javascript)
|
| 3 |
+
|
| 4 |
+
add_custom_command(
|
| 5 |
+
OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/javascript/publish.log
|
| 6 |
+
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/whisper.js
|
| 7 |
+
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/libwhisper.worker.js
|
| 8 |
+
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/package.json
|
| 9 |
+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/javascript
|
| 10 |
+
COMMAND npm publish
|
| 11 |
+
COMMAND touch publish.log
|
| 12 |
+
COMMENT "Publishing npm module v${PROJECT_VERSION}"
|
| 13 |
+
VERBATIM
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
add_custom_target(publish-npm
|
| 17 |
+
DEPENDS javascript/publish.log
|
| 18 |
+
)
|
| 19 |
endif()
|
bindings/javascript/CMakeLists.txt
CHANGED
|
@@ -20,15 +20,22 @@ if (WHISPER_WASM_SINGLE_FILE)
|
|
| 20 |
${CMAKE_BINARY_DIR}/bin/libwhisper.js
|
| 21 |
${CMAKE_CURRENT_SOURCE_DIR}/whisper.js
|
| 22 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
endif()
|
| 24 |
|
| 25 |
set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
|
| 26 |
--bind \
|
|
|
|
|
|
|
|
|
|
| 27 |
-s USE_PTHREADS=1 \
|
| 28 |
-s PTHREAD_POOL_SIZE=8 \
|
| 29 |
-
-s
|
| 30 |
-
-s TOTAL_MEMORY=1610612736 \
|
| 31 |
-
-s FORCE_FILESYSTEM=1 \
|
| 32 |
-
-s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
|
| 33 |
${EXTRA_FLAGS} \
|
| 34 |
")
|
|
|
|
| 20 |
${CMAKE_BINARY_DIR}/bin/libwhisper.js
|
| 21 |
${CMAKE_CURRENT_SOURCE_DIR}/whisper.js
|
| 22 |
)
|
| 23 |
+
|
| 24 |
+
add_custom_command(
|
| 25 |
+
TARGET ${TARGET} POST_BUILD
|
| 26 |
+
COMMAND ${CMAKE_COMMAND} -E copy
|
| 27 |
+
${CMAKE_BINARY_DIR}/bin/libwhisper.worker.js
|
| 28 |
+
${CMAKE_CURRENT_SOURCE_DIR}/libwhisper.worker.js
|
| 29 |
+
)
|
| 30 |
endif()
|
| 31 |
|
| 32 |
set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
|
| 33 |
--bind \
|
| 34 |
+
-s MODULARIZE=1 \
|
| 35 |
+
-s EXPORT_NAME=\"'whisper_factory'\" \
|
| 36 |
+
-s FORCE_FILESYSTEM=1 \
|
| 37 |
-s USE_PTHREADS=1 \
|
| 38 |
-s PTHREAD_POOL_SIZE=8 \
|
| 39 |
+
-s ALLOW_MEMORY_GROWTH=1 \
|
|
|
|
|
|
|
|
|
|
| 40 |
${EXTRA_FLAGS} \
|
| 41 |
")
|
bindings/javascript/README.md
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# whisper.cpp
|
| 2 |
+
|
| 3 |
+
Node.js package for Whisper speech recognition
|
| 4 |
+
|
| 5 |
+
For sample usage check [tests/test-whisper.js](/tests/test-whisper.js)
|
bindings/javascript/emscripten.cpp
CHANGED
|
@@ -1,63 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
#include "whisper.h"
|
| 2 |
|
| 3 |
#include <emscripten.h>
|
| 4 |
#include <emscripten/bind.h>
|
| 5 |
|
| 6 |
-
#include <vector>
|
| 7 |
#include <thread>
|
|
|
|
| 8 |
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
std::vector<struct whisper_context *> g_contexts(4, nullptr);
|
| 12 |
|
| 13 |
EMSCRIPTEN_BINDINGS(whisper) {
|
| 14 |
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
| 15 |
-
if (
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
g_contexts[i] = whisper_init(path_model.c_str());
|
| 22 |
-
if (g_contexts[i] != nullptr) {
|
| 23 |
-
return i + 1;
|
| 24 |
-
} else {
|
| 25 |
-
return (size_t) 0;
|
| 26 |
-
}
|
| 27 |
}
|
| 28 |
}
|
| 29 |
|
| 30 |
-
return
|
| 31 |
}));
|
| 32 |
|
| 33 |
-
emscripten::function("free", emscripten::optional_override([](
|
| 34 |
-
if (
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
--index;
|
| 39 |
-
|
| 40 |
-
if (index < g_contexts.size()) {
|
| 41 |
-
whisper_free(g_contexts[index]);
|
| 42 |
-
g_contexts[index] = nullptr;
|
| 43 |
}
|
| 44 |
}));
|
| 45 |
|
| 46 |
-
emscripten::function("full_default", emscripten::optional_override([](
|
| 47 |
-
if (
|
| 48 |
-
g_worker.join();
|
| 49 |
-
}
|
| 50 |
-
|
| 51 |
-
--index;
|
| 52 |
-
|
| 53 |
-
if (index >= g_contexts.size()) {
|
| 54 |
return -1;
|
| 55 |
}
|
| 56 |
|
| 57 |
-
if (g_contexts[index] == nullptr) {
|
| 58 |
-
return -2;
|
| 59 |
-
}
|
| 60 |
-
|
| 61 |
struct whisper_full_params params = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
|
| 62 |
|
| 63 |
params.print_realtime = true;
|
|
@@ -65,7 +50,7 @@ EMSCRIPTEN_BINDINGS(whisper) {
|
|
| 65 |
params.print_timestamps = true;
|
| 66 |
params.print_special = false;
|
| 67 |
params.translate = translate;
|
| 68 |
-
params.language = whisper_is_multilingual(
|
| 69 |
params.n_threads = std::min(8, (int) std::thread::hardware_concurrency());
|
| 70 |
params.offset_ms = 0;
|
| 71 |
|
|
@@ -82,9 +67,11 @@ EMSCRIPTEN_BINDINGS(whisper) {
|
|
| 82 |
|
| 83 |
// print system information
|
| 84 |
{
|
|
|
|
| 85 |
printf("system_info: n_threads = %d / %d | %s\n",
|
| 86 |
params.n_threads, std::thread::hardware_concurrency(), whisper_print_system_info());
|
| 87 |
|
|
|
|
| 88 |
printf("%s: processing %d samples, %.1f sec, %d threads, %d processors, lang = %s, task = %s ...\n",
|
| 89 |
__func__, int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
|
| 90 |
params.n_threads, 1,
|
|
@@ -94,13 +81,11 @@ EMSCRIPTEN_BINDINGS(whisper) {
|
|
| 94 |
printf("\n");
|
| 95 |
}
|
| 96 |
|
| 97 |
-
// run
|
| 98 |
{
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
whisper_print_timings(g_contexts[index]);
|
| 103 |
-
});
|
| 104 |
}
|
| 105 |
|
| 106 |
return 0;
|
|
|
|
| 1 |
+
//
|
| 2 |
+
// This is the Javascript API of whisper.cpp
|
| 3 |
+
//
|
| 4 |
+
// Very crude at the moment.
|
| 5 |
+
// Feel free to contribute and make this better!
|
| 6 |
+
//
|
| 7 |
+
// See the tests/test-whisper.js for sample usage
|
| 8 |
+
//
|
| 9 |
+
|
| 10 |
#include "whisper.h"
|
| 11 |
|
| 12 |
#include <emscripten.h>
|
| 13 |
#include <emscripten/bind.h>
|
| 14 |
|
|
|
|
| 15 |
#include <thread>
|
| 16 |
+
#include <vector>
|
| 17 |
|
| 18 |
+
struct whisper_context * g_context;
|
|
|
|
|
|
|
| 19 |
|
| 20 |
EMSCRIPTEN_BINDINGS(whisper) {
|
| 21 |
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
| 22 |
+
if (g_context == nullptr) {
|
| 23 |
+
g_context = whisper_init(path_model.c_str());
|
| 24 |
+
if (g_context != nullptr) {
|
| 25 |
+
return true;
|
| 26 |
+
} else {
|
| 27 |
+
return false;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
}
|
| 29 |
}
|
| 30 |
|
| 31 |
+
return false;
|
| 32 |
}));
|
| 33 |
|
| 34 |
+
emscripten::function("free", emscripten::optional_override([]() {
|
| 35 |
+
if (g_context) {
|
| 36 |
+
whisper_free(g_context);
|
| 37 |
+
g_context = nullptr;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
}
|
| 39 |
}));
|
| 40 |
|
| 41 |
+
emscripten::function("full_default", emscripten::optional_override([](const emscripten::val & audio, const std::string & lang, bool translate) {
|
| 42 |
+
if (g_context == nullptr) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
return -1;
|
| 44 |
}
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
struct whisper_full_params params = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
|
| 47 |
|
| 48 |
params.print_realtime = true;
|
|
|
|
| 50 |
params.print_timestamps = true;
|
| 51 |
params.print_special = false;
|
| 52 |
params.translate = translate;
|
| 53 |
+
params.language = whisper_is_multilingual(g_context) ? lang.c_str() : "en";
|
| 54 |
params.n_threads = std::min(8, (int) std::thread::hardware_concurrency());
|
| 55 |
params.offset_ms = 0;
|
| 56 |
|
|
|
|
| 67 |
|
| 68 |
// print system information
|
| 69 |
{
|
| 70 |
+
printf("\n");
|
| 71 |
printf("system_info: n_threads = %d / %d | %s\n",
|
| 72 |
params.n_threads, std::thread::hardware_concurrency(), whisper_print_system_info());
|
| 73 |
|
| 74 |
+
printf("\n");
|
| 75 |
printf("%s: processing %d samples, %.1f sec, %d threads, %d processors, lang = %s, task = %s ...\n",
|
| 76 |
__func__, int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
|
| 77 |
params.n_threads, 1,
|
|
|
|
| 81 |
printf("\n");
|
| 82 |
}
|
| 83 |
|
| 84 |
+
// run whisper
|
| 85 |
{
|
| 86 |
+
whisper_reset_timings(g_context);
|
| 87 |
+
whisper_full(g_context, params, pcmf32.data(), pcmf32.size());
|
| 88 |
+
whisper_print_timings(g_context);
|
|
|
|
|
|
|
| 89 |
}
|
| 90 |
|
| 91 |
return 0;
|
bindings/javascript/libwhisper.worker.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"use strict";var Module={};var ENVIRONMENT_IS_NODE=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string";if(ENVIRONMENT_IS_NODE){var nodeWorkerThreads=require("worker_threads");var parentPort=nodeWorkerThreads.parentPort;parentPort.on("message",data=>onmessage({data:data}));var fs=require("fs");Object.assign(global,{self:global,require:require,Module:Module,location:{href:__filename},Worker:nodeWorkerThreads.Worker,importScripts:function(f){(0,eval)(fs.readFileSync(f,"utf8")+"//# sourceURL="+f)},postMessage:function(msg){parentPort.postMessage(msg)},performance:global.performance||{now:function(){return Date.now()}}})}var initializedJS=false;var pendingNotifiedProxyingQueues=[];function threadPrintErr(){var text=Array.prototype.slice.call(arguments).join(" ");if(ENVIRONMENT_IS_NODE){fs.writeSync(2,text+"\n");return}console.error(text)}function threadAlert(){var text=Array.prototype.slice.call(arguments).join(" ");postMessage({cmd:"alert",text:text,threadId:Module["_pthread_self"]()})}var err=threadPrintErr;self.alert=threadAlert;Module["instantiateWasm"]=(info,receiveInstance)=>{var instance=new WebAssembly.Instance(Module["wasmModule"],info);receiveInstance(instance);Module["wasmModule"]=null;return instance.exports};self.onunhandledrejection=e=>{throw e.reason??e};self.onmessage=e=>{try{if(e.data.cmd==="load"){Module["wasmModule"]=e.data.wasmModule;for(const handler of e.data.handlers){Module[handler]=function(){postMessage({cmd:"callHandler",handler:handler,args:[...arguments]})}}Module["wasmMemory"]=e.data.wasmMemory;Module["buffer"]=Module["wasmMemory"].buffer;Module["ENVIRONMENT_IS_PTHREAD"]=true;if(typeof e.data.urlOrBlob=="string"){importScripts(e.data.urlOrBlob)}else{var objectUrl=URL.createObjectURL(e.data.urlOrBlob);importScripts(objectUrl);URL.revokeObjectURL(objectUrl)}whisper_factory(Module).then(function(instance){Module=instance})}else if(e.data.cmd==="run"){Module["__performance_now_clock_drift"]=performance.now()-e.data.time;Module["__emscripten_thread_init"](e.data.pthread_ptr,0,0,1);Module["establishStackSpace"]();Module["PThread"].receiveObjectTransfer(e.data);Module["PThread"].threadInitTLS();if(!initializedJS){Module["__embind_initialize_bindings"]();pendingNotifiedProxyingQueues.forEach(queue=>{Module["executeNotifiedProxyingQueue"](queue)});pendingNotifiedProxyingQueues=[];initializedJS=true}try{Module["invokeEntryPoint"](e.data.start_routine,e.data.arg)}catch(ex){if(ex!="unwind"){if(ex instanceof Module["ExitStatus"]){if(Module["keepRuntimeAlive"]()){}else{Module["__emscripten_thread_exit"](ex.status)}}else{throw ex}}}}else if(e.data.cmd==="cancel"){if(Module["_pthread_self"]()){Module["__emscripten_thread_exit"](-1)}}else if(e.data.target==="setimmediate"){}else if(e.data.cmd==="processProxyingQueue"){if(initializedJS){Module["executeNotifiedProxyingQueue"](e.data.queue)}else{pendingNotifiedProxyingQueues.push(e.data.queue)}}else if(e.data.cmd){err("worker.js received unknown command "+e.data.cmd);err(e.data)}}catch(ex){if(Module["__emscripten_thread_crashed"]){Module["__emscripten_thread_crashed"]()}throw ex}};
|
bindings/javascript/package-tmpl.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "whisper.cpp",
|
| 3 |
+
"version": "@PROJECT_VERSION@",
|
| 4 |
+
"description": "Whisper speech recognition",
|
| 5 |
+
"main": "whisper.js",
|
| 6 |
+
"scripts": {
|
| 7 |
+
"test": "echo \"todo: add tests\" && exit 0"
|
| 8 |
+
},
|
| 9 |
+
"repository": {
|
| 10 |
+
"type": "git",
|
| 11 |
+
"url": "git+https://github.com/ggerganov/whisper.cpp"
|
| 12 |
+
},
|
| 13 |
+
"keywords": [
|
| 14 |
+
"openai",
|
| 15 |
+
"whisper",
|
| 16 |
+
"speech-to-text",
|
| 17 |
+
"speech-recognition",
|
| 18 |
+
"transformer"
|
| 19 |
+
],
|
| 20 |
+
"author": "Georgi Gerganov",
|
| 21 |
+
"license": "MIT",
|
| 22 |
+
"bugs": {
|
| 23 |
+
"url": "https://github.com/ggerganov/whisper.cpp/issues"
|
| 24 |
+
},
|
| 25 |
+
"homepage": "https://github.com/ggerganov/whisper.cpp#readme"
|
| 26 |
+
}
|
bindings/javascript/package.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "whisper.cpp",
|
| 3 |
+
"version": "1.0.0",
|
| 4 |
+
"description": "Whisper speech recognition",
|
| 5 |
+
"main": "whisper.js",
|
| 6 |
+
"scripts": {
|
| 7 |
+
"test": "echo \"todo: add tests\" && exit 0"
|
| 8 |
+
},
|
| 9 |
+
"repository": {
|
| 10 |
+
"type": "git",
|
| 11 |
+
"url": "git+https://github.com/ggerganov/whisper.cpp"
|
| 12 |
+
},
|
| 13 |
+
"keywords": [
|
| 14 |
+
"openai",
|
| 15 |
+
"whisper",
|
| 16 |
+
"speech-to-text",
|
| 17 |
+
"speech-recognition",
|
| 18 |
+
"transformer"
|
| 19 |
+
],
|
| 20 |
+
"author": "Georgi Gerganov",
|
| 21 |
+
"license": "MIT",
|
| 22 |
+
"bugs": {
|
| 23 |
+
"url": "https://github.com/ggerganov/whisper.cpp/issues"
|
| 24 |
+
},
|
| 25 |
+
"homepage": "https://github.com/ggerganov/whisper.cpp#readme"
|
| 26 |
+
}
|
bindings/javascript/whisper.js
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examples/whisper.wasm/CMakeLists.txt
CHANGED
|
@@ -1,5 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
set(TARGET whisper.wasm)
|
| 2 |
|
| 3 |
-
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html
|
| 4 |
-
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../helpers.js
|
| 5 |
-
configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/whisper.js ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/whisper.js COPYONLY)
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# libmain
|
| 3 |
+
#
|
| 4 |
+
|
| 5 |
+
set(TARGET libmain)
|
| 6 |
+
|
| 7 |
+
add_executable(${TARGET}
|
| 8 |
+
emscripten.cpp
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
target_link_libraries(${TARGET} PRIVATE
|
| 12 |
+
whisper
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
unset(EXTRA_FLAGS)
|
| 16 |
+
|
| 17 |
+
if (WHISPER_WASM_SINGLE_FILE)
|
| 18 |
+
set(EXTRA_FLAGS "-s SINGLE_FILE=1")
|
| 19 |
+
message(STATUS "Embedding WASM inside main.js")
|
| 20 |
+
|
| 21 |
+
add_custom_command(
|
| 22 |
+
TARGET ${TARGET} POST_BUILD
|
| 23 |
+
COMMAND ${CMAKE_COMMAND} -E copy
|
| 24 |
+
${CMAKE_BINARY_DIR}/bin/libmain.js
|
| 25 |
+
${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/whisper.wasm/main.js
|
| 26 |
+
)
|
| 27 |
+
endif()
|
| 28 |
+
|
| 29 |
+
set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
|
| 30 |
+
--bind \
|
| 31 |
+
-s USE_PTHREADS=1 \
|
| 32 |
+
-s PTHREAD_POOL_SIZE=8 \
|
| 33 |
+
-s INITIAL_MEMORY=1024MB \
|
| 34 |
+
-s TOTAL_MEMORY=1024MB \
|
| 35 |
+
-s FORCE_FILESYSTEM=1 \
|
| 36 |
+
-s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
|
| 37 |
+
${EXTRA_FLAGS} \
|
| 38 |
+
")
|
| 39 |
+
|
| 40 |
+
#
|
| 41 |
+
# whisper.wasm
|
| 42 |
+
#
|
| 43 |
+
|
| 44 |
set(TARGET whisper.wasm)
|
| 45 |
|
| 46 |
+
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY)
|
| 47 |
+
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../helpers.js ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/helpers.js @ONLY)
|
|
|
examples/whisper.wasm/emscripten.cpp
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "whisper.h"
|
| 2 |
+
|
| 3 |
+
#include <emscripten.h>
|
| 4 |
+
#include <emscripten/bind.h>
|
| 5 |
+
|
| 6 |
+
#include <vector>
|
| 7 |
+
#include <thread>
|
| 8 |
+
|
| 9 |
+
std::thread g_worker;
|
| 10 |
+
|
| 11 |
+
std::vector<struct whisper_context *> g_contexts(4, nullptr);
|
| 12 |
+
|
| 13 |
+
EMSCRIPTEN_BINDINGS(whisper) {
|
| 14 |
+
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
| 15 |
+
if (g_worker.joinable()) {
|
| 16 |
+
g_worker.join();
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
for (size_t i = 0; i < g_contexts.size(); ++i) {
|
| 20 |
+
if (g_contexts[i] == nullptr) {
|
| 21 |
+
g_contexts[i] = whisper_init(path_model.c_str());
|
| 22 |
+
if (g_contexts[i] != nullptr) {
|
| 23 |
+
return i + 1;
|
| 24 |
+
} else {
|
| 25 |
+
return (size_t) 0;
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
return (size_t) 0;
|
| 31 |
+
}));
|
| 32 |
+
|
| 33 |
+
emscripten::function("free", emscripten::optional_override([](size_t index) {
|
| 34 |
+
if (g_worker.joinable()) {
|
| 35 |
+
g_worker.join();
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
--index;
|
| 39 |
+
|
| 40 |
+
if (index < g_contexts.size()) {
|
| 41 |
+
whisper_free(g_contexts[index]);
|
| 42 |
+
g_contexts[index] = nullptr;
|
| 43 |
+
}
|
| 44 |
+
}));
|
| 45 |
+
|
| 46 |
+
emscripten::function("full_default", emscripten::optional_override([](size_t index, const emscripten::val & audio, const std::string & lang, bool translate) {
|
| 47 |
+
if (g_worker.joinable()) {
|
| 48 |
+
g_worker.join();
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
--index;
|
| 52 |
+
|
| 53 |
+
if (index >= g_contexts.size()) {
|
| 54 |
+
return -1;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
if (g_contexts[index] == nullptr) {
|
| 58 |
+
return -2;
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
struct whisper_full_params params = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
|
| 62 |
+
|
| 63 |
+
params.print_realtime = true;
|
| 64 |
+
params.print_progress = false;
|
| 65 |
+
params.print_timestamps = true;
|
| 66 |
+
params.print_special = false;
|
| 67 |
+
params.translate = translate;
|
| 68 |
+
params.language = whisper_is_multilingual(g_contexts[index]) ? lang.c_str() : "en";
|
| 69 |
+
params.n_threads = std::min(8, (int) std::thread::hardware_concurrency());
|
| 70 |
+
params.offset_ms = 0;
|
| 71 |
+
|
| 72 |
+
std::vector<float> pcmf32;
|
| 73 |
+
const int n = audio["length"].as<int>();
|
| 74 |
+
|
| 75 |
+
emscripten::val heap = emscripten::val::module_property("HEAPU8");
|
| 76 |
+
emscripten::val memory = heap["buffer"];
|
| 77 |
+
|
| 78 |
+
pcmf32.resize(n);
|
| 79 |
+
|
| 80 |
+
emscripten::val memoryView = audio["constructor"].new_(memory, reinterpret_cast<uintptr_t>(pcmf32.data()), n);
|
| 81 |
+
memoryView.call<void>("set", audio);
|
| 82 |
+
|
| 83 |
+
// print system information
|
| 84 |
+
{
|
| 85 |
+
printf("system_info: n_threads = %d / %d | %s\n",
|
| 86 |
+
params.n_threads, std::thread::hardware_concurrency(), whisper_print_system_info());
|
| 87 |
+
|
| 88 |
+
printf("%s: processing %d samples, %.1f sec, %d threads, %d processors, lang = %s, task = %s ...\n",
|
| 89 |
+
__func__, int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
|
| 90 |
+
params.n_threads, 1,
|
| 91 |
+
params.language,
|
| 92 |
+
params.translate ? "translate" : "transcribe");
|
| 93 |
+
|
| 94 |
+
printf("\n");
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
// run the worker
|
| 98 |
+
{
|
| 99 |
+
g_worker = std::thread([index, params, pcmf32 = std::move(pcmf32)]() {
|
| 100 |
+
whisper_reset_timings(g_contexts[index]);
|
| 101 |
+
whisper_full(g_contexts[index], params, pcmf32.data(), pcmf32.size());
|
| 102 |
+
whisper_print_timings(g_contexts[index]);
|
| 103 |
+
});
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
return 0;
|
| 107 |
+
}));
|
| 108 |
+
}
|
examples/whisper.wasm/index-tmpl.html
CHANGED
|
@@ -550,6 +550,6 @@
|
|
| 550 |
}
|
| 551 |
}
|
| 552 |
</script>
|
| 553 |
-
<script type="text/javascript" src="
|
| 554 |
</body>
|
| 555 |
</html>
|
|
|
|
| 550 |
}
|
| 551 |
}
|
| 552 |
</script>
|
| 553 |
+
<script type="text/javascript" src="main.js"></script>
|
| 554 |
</body>
|
| 555 |
</html>
|
extra/deploy-wasm.sh
CHANGED
|
@@ -21,7 +21,7 @@ if [ $? -ne 0 ]; then
|
|
| 21 |
fi
|
| 22 |
|
| 23 |
# copy all wasm files to the node
|
| 24 |
-
scp bin/whisper.wasm/* root@linode0:/var/www/html/whisper/ && scp bin/
|
| 25 |
scp bin/stream.wasm/* root@linode0:/var/www/html/whisper/stream/ && scp bin/libstream.worker.js root@linode0:/var/www/html/whisper/stream/
|
| 26 |
scp bin/command.wasm/* root@linode0:/var/www/html/whisper/command/ && scp bin/libcommand.worker.js root@linode0:/var/www/html/whisper/command/
|
| 27 |
scp bin/talk.wasm/* root@linode0:/var/www/html/whisper/talk/ && scp bin/libtalk.worker.js root@linode0:/var/www/html/whisper/talk/
|
|
|
|
| 21 |
fi
|
| 22 |
|
| 23 |
# copy all wasm files to the node
|
| 24 |
+
scp bin/whisper.wasm/* root@linode0:/var/www/html/whisper/ && scp bin/libmain.worker.js root@linode0:/var/www/html/whisper/
|
| 25 |
scp bin/stream.wasm/* root@linode0:/var/www/html/whisper/stream/ && scp bin/libstream.worker.js root@linode0:/var/www/html/whisper/stream/
|
| 26 |
scp bin/command.wasm/* root@linode0:/var/www/html/whisper/command/ && scp bin/libcommand.worker.js root@linode0:/var/www/html/whisper/command/
|
| 27 |
scp bin/talk.wasm/* root@linode0:/var/www/html/whisper/talk/ && scp bin/libtalk.worker.js root@linode0:/var/www/html/whisper/talk/
|
tests/CMakeLists.txt
CHANGED
|
@@ -1,4 +1,14 @@
|
|
| 1 |
if (EMSCRIPTEN)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
return()
|
| 3 |
endif()
|
| 4 |
|
|
|
|
| 1 |
if (EMSCRIPTEN)
|
| 2 |
+
#
|
| 3 |
+
# test-whisper-js
|
| 4 |
+
|
| 5 |
+
set(TEST_TARGET test-whisper-js)
|
| 6 |
+
|
| 7 |
+
add_test(NAME ${TEST_TARGET}
|
| 8 |
+
COMMAND node test-whisper.js --experimental-wasm-threads
|
| 9 |
+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
return()
|
| 13 |
endif()
|
| 14 |
|
tests/test-whisper.js
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
var factory = require('../bindings/javascript/whisper.js')
|
| 2 |
+
|
| 3 |
+
factory().then(function(whisper) {
|
| 4 |
+
var fs = require('fs');
|
| 5 |
+
|
| 6 |
+
// to avoid reading WAV files and depending on some 3rd-party package, we read
|
| 7 |
+
// 32-bit float PCM directly. to genereate it:
|
| 8 |
+
//
|
| 9 |
+
// $ ffmpeg -i samples/jfk.wav -f f32le -acodec pcm_f32le samples/jfk.pcmf32
|
| 10 |
+
//
|
| 11 |
+
let fname_wav = "../samples/jfk.pcmf32";
|
| 12 |
+
let fname_model = "../models/ggml-base.en.bin";
|
| 13 |
+
|
| 14 |
+
// init whisper
|
| 15 |
+
{
|
| 16 |
+
// read binary data from file
|
| 17 |
+
var model_data = fs.readFileSync(fname_model);
|
| 18 |
+
if (model_data == null) {
|
| 19 |
+
console.log("whisper: failed to read model file");
|
| 20 |
+
process.exit(1);
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
// write binary data to WASM memory
|
| 24 |
+
whisper.FS_createDataFile("/", "whisper.bin", model_data, true, true);
|
| 25 |
+
|
| 26 |
+
// init the model
|
| 27 |
+
var ret = whisper.init("whisper.bin");
|
| 28 |
+
if (ret == false) {
|
| 29 |
+
console.log('whisper: failed to init');
|
| 30 |
+
process.exit(1);
|
| 31 |
+
}
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
// transcribe wav file
|
| 35 |
+
{
|
| 36 |
+
// read raw binary data
|
| 37 |
+
var pcm_data = fs.readFileSync(fname_wav);
|
| 38 |
+
if (pcm_data == null) {
|
| 39 |
+
console.log("whisper: failed to read wav file");
|
| 40 |
+
process.exit(1);
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
// convert to 32-bit float array
|
| 44 |
+
var pcm = new Float32Array(pcm_data.buffer);
|
| 45 |
+
|
| 46 |
+
// transcribe
|
| 47 |
+
var ret = whisper.full_default(pcm, "en", false);
|
| 48 |
+
if (ret != 0) {
|
| 49 |
+
console.log("whisper: failed to transcribe");
|
| 50 |
+
process.exit(1);
|
| 51 |
+
}
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
// free memory
|
| 55 |
+
{
|
| 56 |
+
whisper.free();
|
| 57 |
+
}
|
| 58 |
+
});
|