ggerganov commited on
Commit
4312a39
·
1 Parent(s): 363140f

Update README.md and finalize the whisper.wasm example

Browse files
CMakeLists.txt CHANGED
@@ -124,6 +124,7 @@ else()
124
  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX2 /D_CRT_SECURE_NO_WARNINGS=1")
125
  else()
126
  if (EMSCRIPTEN)
 
127
  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -msimd128")
128
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
129
  else()
 
124
  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX2 /D_CRT_SECURE_NO_WARNINGS=1")
125
  else()
126
  if (EMSCRIPTEN)
127
+ # we require support for WASM SIMD 128-bit
128
  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -msimd128")
129
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
130
  else()
Makefile CHANGED
@@ -90,7 +90,7 @@ libwhisper.a: ggml.o whisper.o
90
  ar rcs libwhisper.a ggml.o whisper.o
91
 
92
  clean:
93
- rm -f *.o main libwhisper.a
94
 
95
  #
96
  # Examples
 
90
  ar rcs libwhisper.a ggml.o whisper.o
91
 
92
  clean:
93
+ rm -f *.o main stream libwhisper.a
94
 
95
  #
96
  # Examples
README.md CHANGED
@@ -289,7 +289,7 @@ You can download the converted models using the [download-ggml-model.sh](downloa
289
 
290
  https://ggml.ggerganov.com
291
 
292
- For more details, see the conversion script [convert-pt-to-ggml.py](convert-pt-to-ggml.py) or the README in [models](models).
293
 
294
  ## Bindings
295
 
 
289
 
290
  https://ggml.ggerganov.com
291
 
292
+ For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or the README in [models](models).
293
 
294
  ## Bindings
295
 
examples/whisper.wasm/README.md CHANGED
@@ -1,3 +1,27 @@
1
  # whisper.wasm
2
 
3
- Live demo: https://whisper.ggerganov.com
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # whisper.wasm
2
 
3
+ Inference of [OpenAI's Whisper ASR model](https://github.com/openai/whisper) inside the browser
4
+
5
+ This example uses a WebAssembly (WASM) port of the [whisper.cpp](https://github.com/ggerganov/whisper.cpp)
6
+ implementation of the transformer to run the inference inside a web page. The audio data does not leave your computer -
7
+ it is processed locally on your machine. The performance is not great but you should be able to achieve x2 or x3
8
+ real-time for the `tiny` and `base` models on a modern CPU and browser (i.e. transcribe a 60 seconds audio in about
9
+ ~20-30 seconds).
10
+
11
+ This WASM port utilizes [WASM SIMD 128-bit intrinsics](https://emcc.zcopy.site/docs/porting/simd/) so you have to make
12
+ sure that [your browser supports them](https://webassembly.org/roadmap/).
13
+
14
+ The example is capable of running all models up to size `small` inclusive. Beyond that, the memory requirements and
15
+ performance are unsatisfactory. The implementation currently support only the `Greedy` sampling strategy. Both
16
+ transcription and translation are supported.
17
+
18
+ Since the model data is quite big (74MB for the `tiny` model) you need to manually load the model into the web-page.
19
+
20
+ The example supports both loading audio from a file and recording audio from the microphone. The maximum length of the
21
+ audio is limited to 120 seconds.
22
+
23
+ ## Live demo
24
+
25
+ Link: https://whisper.ggerganov.com
26
+
27
+ ![image](https://user-images.githubusercontent.com/1991296/197348344-1a7fead8-3dae-4922-8b06-df223a206603.png)
examples/whisper.wasm/index-tmpl.html CHANGED
@@ -162,7 +162,7 @@
162
  </tr>
163
  </table>
164
 
165
- <br><br>
166
 
167
  <!-- textarea with height filling the rest of the page -->
168
  <textarea id="output" rows="20"></textarea>
@@ -254,6 +254,10 @@
254
  return new type(buffer);
255
  }
256
 
 
 
 
 
257
  function loadFile(event, fname) {
258
  var file = event.target.files[0] || null;
259
  if (file == null) {
@@ -281,6 +285,10 @@
281
  reader.readAsArrayBuffer(file);
282
  }
283
 
 
 
 
 
284
  function loadAudio(event) {
285
  if (!context) {
286
  context = new AudioContext({sampleRate: 16000});
@@ -327,7 +335,7 @@
327
  }
328
 
329
  //
330
- // Microphone
331
  //
332
 
333
  var mediaRecorder = null;
 
162
  </tr>
163
  </table>
164
 
165
+ <br>
166
 
167
  <!-- textarea with height filling the rest of the page -->
168
  <textarea id="output" rows="20"></textarea>
 
254
  return new type(buffer);
255
  }
256
 
257
+ //
258
+ // load model
259
+ //
260
+
261
  function loadFile(event, fname) {
262
  var file = event.target.files[0] || null;
263
  if (file == null) {
 
285
  reader.readAsArrayBuffer(file);
286
  }
287
 
288
+ //
289
+ // audio file
290
+ //
291
+
292
  function loadAudio(event) {
293
  if (!context) {
294
  context = new AudioContext({sampleRate: 16000});
 
335
  }
336
 
337
  //
338
+ // microphone
339
  //
340
 
341
  var mediaRecorder = null;
extra/convert-all.sh CHANGED
@@ -3,6 +3,6 @@
3
  models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )
4
 
5
  for model in "${models[@]}"; do
6
- python3 convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
7
  mv -v models/ggml-model.bin models/ggml-$model.bin
8
  done
 
3
  models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )
4
 
5
  for model in "${models[@]}"; do
6
+ python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
7
  mv -v models/ggml-model.bin models/ggml-$model.bin
8
  done
convert-pt-to-ggml.py → models/convert-pt-to-ggml.py RENAMED
File without changes