Spaces:
Running
Running
Update README.md and finalize the whisper.wasm example
Browse files- CMakeLists.txt +1 -0
- Makefile +1 -1
- README.md +1 -1
- examples/whisper.wasm/README.md +25 -1
- examples/whisper.wasm/index-tmpl.html +10 -2
- extra/convert-all.sh +1 -1
- convert-pt-to-ggml.py → models/convert-pt-to-ggml.py +0 -0
CMakeLists.txt
CHANGED
|
@@ -124,6 +124,7 @@ else()
|
|
| 124 |
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX2 /D_CRT_SECURE_NO_WARNINGS=1")
|
| 125 |
else()
|
| 126 |
if (EMSCRIPTEN)
|
|
|
|
| 127 |
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -msimd128")
|
| 128 |
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
| 129 |
else()
|
|
|
|
| 124 |
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX2 /D_CRT_SECURE_NO_WARNINGS=1")
|
| 125 |
else()
|
| 126 |
if (EMSCRIPTEN)
|
| 127 |
+
# we require support for WASM SIMD 128-bit
|
| 128 |
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -msimd128")
|
| 129 |
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
| 130 |
else()
|
Makefile
CHANGED
|
@@ -90,7 +90,7 @@ libwhisper.a: ggml.o whisper.o
|
|
| 90 |
ar rcs libwhisper.a ggml.o whisper.o
|
| 91 |
|
| 92 |
clean:
|
| 93 |
-
rm -f *.o main libwhisper.a
|
| 94 |
|
| 95 |
#
|
| 96 |
# Examples
|
|
|
|
| 90 |
ar rcs libwhisper.a ggml.o whisper.o
|
| 91 |
|
| 92 |
clean:
|
| 93 |
+
rm -f *.o main stream libwhisper.a
|
| 94 |
|
| 95 |
#
|
| 96 |
# Examples
|
README.md
CHANGED
|
@@ -289,7 +289,7 @@ You can download the converted models using the [download-ggml-model.sh](downloa
|
|
| 289 |
|
| 290 |
https://ggml.ggerganov.com
|
| 291 |
|
| 292 |
-
For more details, see the conversion script [convert-pt-to-ggml.py](convert-pt-to-ggml.py) or the README in [models](models).
|
| 293 |
|
| 294 |
## Bindings
|
| 295 |
|
|
|
|
| 289 |
|
| 290 |
https://ggml.ggerganov.com
|
| 291 |
|
| 292 |
+
For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or the README in [models](models).
|
| 293 |
|
| 294 |
## Bindings
|
| 295 |
|
examples/whisper.wasm/README.md
CHANGED
|
@@ -1,3 +1,27 @@
|
|
| 1 |
# whisper.wasm
|
| 2 |
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# whisper.wasm
|
| 2 |
|
| 3 |
+
Inference of [OpenAI's Whisper ASR model](https://github.com/openai/whisper) inside the browser
|
| 4 |
+
|
| 5 |
+
This example uses a WebAssembly (WASM) port of the [whisper.cpp](https://github.com/ggerganov/whisper.cpp)
|
| 6 |
+
implementation of the transformer to run the inference inside a web page. The audio data does not leave your computer -
|
| 7 |
+
it is processed locally on your machine. The performance is not great but you should be able to achieve x2 or x3
|
| 8 |
+
real-time for the `tiny` and `base` models on a modern CPU and browser (i.e. transcribe a 60 seconds audio in about
|
| 9 |
+
~20-30 seconds).
|
| 10 |
+
|
| 11 |
+
This WASM port utilizes [WASM SIMD 128-bit intrinsics](https://emcc.zcopy.site/docs/porting/simd/) so you have to make
|
| 12 |
+
sure that [your browser supports them](https://webassembly.org/roadmap/).
|
| 13 |
+
|
| 14 |
+
The example is capable of running all models up to size `small` inclusive. Beyond that, the memory requirements and
|
| 15 |
+
performance are unsatisfactory. The implementation currently support only the `Greedy` sampling strategy. Both
|
| 16 |
+
transcription and translation are supported.
|
| 17 |
+
|
| 18 |
+
Since the model data is quite big (74MB for the `tiny` model) you need to manually load the model into the web-page.
|
| 19 |
+
|
| 20 |
+
The example supports both loading audio from a file and recording audio from the microphone. The maximum length of the
|
| 21 |
+
audio is limited to 120 seconds.
|
| 22 |
+
|
| 23 |
+
## Live demo
|
| 24 |
+
|
| 25 |
+
Link: https://whisper.ggerganov.com
|
| 26 |
+
|
| 27 |
+

|
examples/whisper.wasm/index-tmpl.html
CHANGED
|
@@ -162,7 +162,7 @@
|
|
| 162 |
</tr>
|
| 163 |
</table>
|
| 164 |
|
| 165 |
-
<br
|
| 166 |
|
| 167 |
<!-- textarea with height filling the rest of the page -->
|
| 168 |
<textarea id="output" rows="20"></textarea>
|
|
@@ -254,6 +254,10 @@
|
|
| 254 |
return new type(buffer);
|
| 255 |
}
|
| 256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
function loadFile(event, fname) {
|
| 258 |
var file = event.target.files[0] || null;
|
| 259 |
if (file == null) {
|
|
@@ -281,6 +285,10 @@
|
|
| 281 |
reader.readAsArrayBuffer(file);
|
| 282 |
}
|
| 283 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 284 |
function loadAudio(event) {
|
| 285 |
if (!context) {
|
| 286 |
context = new AudioContext({sampleRate: 16000});
|
|
@@ -327,7 +335,7 @@
|
|
| 327 |
}
|
| 328 |
|
| 329 |
//
|
| 330 |
-
//
|
| 331 |
//
|
| 332 |
|
| 333 |
var mediaRecorder = null;
|
|
|
|
| 162 |
</tr>
|
| 163 |
</table>
|
| 164 |
|
| 165 |
+
<br>
|
| 166 |
|
| 167 |
<!-- textarea with height filling the rest of the page -->
|
| 168 |
<textarea id="output" rows="20"></textarea>
|
|
|
|
| 254 |
return new type(buffer);
|
| 255 |
}
|
| 256 |
|
| 257 |
+
//
|
| 258 |
+
// load model
|
| 259 |
+
//
|
| 260 |
+
|
| 261 |
function loadFile(event, fname) {
|
| 262 |
var file = event.target.files[0] || null;
|
| 263 |
if (file == null) {
|
|
|
|
| 285 |
reader.readAsArrayBuffer(file);
|
| 286 |
}
|
| 287 |
|
| 288 |
+
//
|
| 289 |
+
// audio file
|
| 290 |
+
//
|
| 291 |
+
|
| 292 |
function loadAudio(event) {
|
| 293 |
if (!context) {
|
| 294 |
context = new AudioContext({sampleRate: 16000});
|
|
|
|
| 335 |
}
|
| 336 |
|
| 337 |
//
|
| 338 |
+
// microphone
|
| 339 |
//
|
| 340 |
|
| 341 |
var mediaRecorder = null;
|
extra/convert-all.sh
CHANGED
|
@@ -3,6 +3,6 @@
|
|
| 3 |
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )
|
| 4 |
|
| 5 |
for model in "${models[@]}"; do
|
| 6 |
-
python3 convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
|
| 7 |
mv -v models/ggml-model.bin models/ggml-$model.bin
|
| 8 |
done
|
|
|
|
| 3 |
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )
|
| 4 |
|
| 5 |
for model in "${models[@]}"; do
|
| 6 |
+
python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
|
| 7 |
mv -v models/ggml-model.bin models/ggml-$model.bin
|
| 8 |
done
|
convert-pt-to-ggml.py → models/convert-pt-to-ggml.py
RENAMED
|
File without changes
|