Spaces:

natasa365
/

whisper.cpp

Sleeping

App Files Files Community

ggerganov commited on Dec 11, 2022

Commit

68dae1f

unverified ·

1 Parent(s): d161cee

bench.wasm : same as "bench" but runs in the browser (#89)

Browse files

Files changed (9) hide show

README.md +1 -1
bindings/javascript/whisper.js +0 -0
examples/CMakeLists.txt +1 -0
examples/bench.wasm/CMakeLists.txt +47 -0
examples/bench.wasm/README.md +22 -0
examples/bench.wasm/emscripten.cpp +80 -0
examples/bench.wasm/index-tmpl.html +227 -0
examples/bench/README.md +3 -1
extra/deploy-wasm.sh +1 -0

README.md CHANGED Viewed

@@ -459,7 +459,7 @@ Some of the examples are even ported to run in the browser using WebAssembly. Ch
 | Example | Web | Description |
 | ---     | --- | ---         |
 | [main](examples/main) | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper |
-| [bench](examples/bench) | | Benchmark the performance of Whisper on your machine |
 | [stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture |
 | [command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic |
 | [talk](examples/talk) | [talk.wasm](examples/talk.wasm) | Talk with a GPT-2 bot |

 | Example | Web | Description |
 | ---     | --- | ---         |
 | [main](examples/main) | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper |
+| [bench](examples/bench) | [bench.wasm](examples/bench.wasm) | Benchmark the performance of Whisper on your machine |
 | [stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture |
 | [command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic |
 | [talk](examples/talk) | [talk.wasm](examples/talk.wasm) | Talk with a GPT-2 bot |

bindings/javascript/whisper.js CHANGED Viewed

The diff for this file is too large to render. See raw diff

examples/CMakeLists.txt CHANGED Viewed

@@ -23,6 +23,7 @@ if (EMSCRIPTEN)
     add_subdirectory(stream.wasm)
     add_subdirectory(command.wasm)
     add_subdirectory(talk.wasm)
 else()
     add_subdirectory(main)
     add_subdirectory(stream)

     add_subdirectory(stream.wasm)
     add_subdirectory(command.wasm)
     add_subdirectory(talk.wasm)
+    add_subdirectory(bench.wasm)
 else()
     add_subdirectory(main)
     add_subdirectory(stream)

examples/bench.wasm/CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,47 @@

+#
+# libbench
+#
+set(TARGET libbench)
+add_executable(${TARGET}
+    emscripten.cpp
+    )
+target_link_libraries(${TARGET} PRIVATE
+    whisper
+    )
+unset(EXTRA_FLAGS)
+if (WHISPER_WASM_SINGLE_FILE)
+    set(EXTRA_FLAGS "-s SINGLE_FILE=1")
+    message(STATUS "Embedding WASM inside bench.js")
+    add_custom_command(
+        TARGET ${TARGET} POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy
+        ${CMAKE_BINARY_DIR}/bin/libbench.js
+        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/bench.wasm/bench.js
+        )
+endif()
+set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
+    --bind \
+    -s USE_PTHREADS=1 \
+    -s PTHREAD_POOL_SIZE=8 \
+    -s INITIAL_MEMORY=1024MB \
+    -s TOTAL_MEMORY=1024MB \
+    -s FORCE_FILESYSTEM=1 \
+    -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
+    ${EXTRA_FLAGS} \
+    ")
+#
+# bench.wasm
+#
+set(TARGET bench.wasm)
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html  ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY)
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../helpers.js    ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/helpers.js @ONLY)

examples/bench.wasm/README.md ADDED Viewed

	@@ -0,0 +1,22 @@

+# bench.wasm
+Benchmark the performance of whisper.cpp in the browser using WebAssembly
+Link: https://whisper.ggerganov.com/bench/
+Terminal version: [examples/bench](/examples/bench)
+## Build instructions
+```bash
+# build using Emscripten (v3.1.2)
+git clone https://github.com/ggerganov/whisper.cpp
+cd whisper.cpp
+mkdir build-em && cd build-em
+emcmake cmake ..
+make -j
+# copy the produced page to your HTTP path
+cp bin/bench.wasm/*       /path/to/html/
+cp bin/libbench.worker.js /path/to/html/
+```

examples/bench.wasm/emscripten.cpp ADDED Viewed

	@@ -0,0 +1,80 @@

+#include "whisper.h"
+#include <emscripten.h>
+#include <emscripten/bind.h>
+#include <cmath>
+#include <string>
+#include <thread>
+#include <vector>
+constexpr int N_THREAD = 8;
+// TODO: get rid of this vector of contexts - bad idea in the first place
+std::vector<struct whisper_context *> g_contexts(4, nullptr);
+std::thread g_worker;
+void bench_main(size_t index) {
+    const int n_threads = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
+    // whisper context
+    auto & ctx = g_contexts[index];
+    fprintf(stderr, "%s: running benchmark with %d threads - please wait...\n", __func__, n_threads);
+    if (int ret = whisper_set_mel(ctx, nullptr, 0, WHISPER_N_MEL)) {
+        fprintf(stderr, "error: failed to set mel: %d\n", ret);
+        return;
+    }
+    if (int ret = whisper_encode(ctx, 0, n_threads) != 0) {
+        fprintf(stderr, "error: failed to encode model: %d\n", ret);
+        return;
+    }
+    whisper_print_timings(ctx);
+    fprintf(stderr, "\n");
+    fprintf(stderr, "If you wish, you can submit these results here:\n");
+    fprintf(stderr, "\n");
+    fprintf(stderr, "  https://github.com/ggerganov/whisper.cpp/issues/89\n");
+    fprintf(stderr, "\n");
+    fprintf(stderr, "Please include the following information:\n");
+    fprintf(stderr, "\n");
+    fprintf(stderr, "  - CPU model\n");
+    fprintf(stderr, "  - Operating system\n");
+    fprintf(stderr, "  - Browser\n");
+    fprintf(stderr, "\n");
+}
+EMSCRIPTEN_BINDINGS(bench) {
+    emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
+        for (size_t i = 0; i < g_contexts.size(); ++i) {
+            if (g_contexts[i] == nullptr) {
+                g_contexts[i] = whisper_init(path_model.c_str());
+                if (g_contexts[i] != nullptr) {
+                    if (g_worker.joinable()) {
+                        g_worker.join();
+                    }
+                    g_worker = std::thread([i]() {
+                        bench_main(i);
+                    });
+                    return i + 1;
+                } else {
+                    return (size_t) 0;
+                }
+            }
+        }
+        return (size_t) 0;
+    }));
+    emscripten::function("free", emscripten::optional_override([](size_t index) {
+        if (index < g_contexts.size()) {
+            whisper_free(g_contexts[index]);
+            g_contexts[index] = nullptr;
+        }
+    }));
+}

examples/bench.wasm/index-tmpl.html ADDED Viewed

	@@ -0,0 +1,227 @@

+<!doctype html>
+<html lang="en-us">
+    <head>
+        <title>bench : Benchmark whisper.cpp performance in the browser</title>
+        <style>
+            #output {
+                width: 100%;
+                height: 100%;
+                margin: 0 auto;
+                margin-top: 10px;
+                border-left: 0px;
+                border-right: 0px;
+                padding-left: 0px;
+                padding-right: 0px;
+                display: block;
+                background-color: black;
+                color: white;
+                font-size: 10px;
+                font-family: 'Lucida Console', Monaco, monospace;
+                outline: none;
+                white-space: pre;
+                overflow-wrap: normal;
+                overflow-x: scroll;
+            }
+        </style>
+    </head>
+    <body>
+        <div id="main-container">
+            <b>bench : Benchmark whisper.cpp performance in the browser</b>
+            <br><br>
+            You can find more about this project on <a href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/bench.wasm">GitHub</a>.
+            <br><br>
+            <hr>
+            Select the model you would like to use and click the "Bench" button.<br>
+            The results will be displayed in the textarea below.
+            <br><br>
+            <div id="model-whisper">
+                Whisper model: <span id="model-whisper-status"></span>
+                <button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
+                <button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">base.en (142 MB)</button>
+                <span id="fetch-whisper-progress"></span>
+                <input type="file" id="whisper-file" name="file" onchange="loadFile(event, 'whisper.bin')" />
+            </div>
+            <br>
+            <div id="input">
+                <button id="bench" onclick="onBench()" disabled>Bench</button>
+                <button id="clear" onclick="clearCache()">Clear Cache</button>
+            </div>
+            <hr>
+            Debug output:
+            <textarea id="output" rows="20"></textarea>
+            <br>
+            <b>Troubleshooting</b>
+            <br><br>
+            The page does some heavy computations, so make sure:
+            <ul>
+                <li>To use a modern web browser (e.g. Chrome, Firefox)</li>
+                <li>To use a fast desktop or laptop computer (i.e. not a mobile phone)</li>
+                <li>Your browser supports WASM <a href="https://webassembly.org/roadmap/">Fixed-width SIMD</a></li>
+            </ul>
+            <div class="cell-version">
+                <span>
+                    |
+                    Build time: <span class="nav-link">@GIT_DATE@</span> |
+                    Commit hash: <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/commit/@GIT_SHA1@">@GIT_SHA1@</a> |
+                    Commit subject: <span class="nav-link">@GIT_COMMIT_SUBJECT@</span> |
+                    <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/bench.wasm">Source Code</a> |
+                </span>
+            </div>
+        </div>
+        <script type="text/javascript" src="helpers.js"></script>
+        <script type='text/javascript'>
+            // the bench instance
+            var instance = null;
+            // model name
+            var model_whisper = null;
+            var Module = {
+                print: printTextarea,
+                printErr: printTextarea,
+                setStatus: function(text) {
+                    printTextarea('js: ' + text);
+                },
+                monitorRunDependencies: function(left) {
+                },
+                preRun: function() {
+                    printTextarea('js: Preparing ...');
+                },
+                postRun: function() {
+                    printTextarea('js: Initialized successfully!');
+                }
+            };
+            //
+            // fetch models
+            //
+            let dbVersion = 1
+            let dbName    = 'whisper.ggerganov.com';
+            let indexedDB = window.indexedDB || window.mozIndexedDB || window.webkitIndexedDB || window.msIndexedDB
+            function storeFS(fname, buf) {
+                // write to WASM file using FS_createDataFile
+                // if the file exists, delete it
+                try {
+                    Module.FS_unlink(fname);
+                } catch (e) {
+                    // ignore
+                }
+                Module.FS_createDataFile("/", fname, buf, true, true);
+                printTextarea('storeFS: stored model: ' + fname + ' size: ' + buf.length);
+                model_whisper = fname;
+                document.getElementById('model-whisper-status').innerHTML = 'loaded "' + model_whisper + '"!';
+                if (model_whisper != null) {
+                    document.getElementById('bench').disabled = false;
+                }
+            }
+            function loadFile(event, fname) {
+                var file = event.target.files[0] || null;
+                if (file == null) {
+                    return;
+                }
+                printTextarea("loadFile: loading model: " + file.name + ", size: " + file.size + " bytes");
+                printTextarea('loadFile: please wait ...');
+                var reader = new FileReader();
+                reader.onload = function(event) {
+                    var buf = new Uint8Array(reader.result);
+                    storeFS(fname, buf);
+                }
+                reader.readAsArrayBuffer(file);
+                document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
+                document.getElementById('fetch-whisper-base-en').style.display = 'none';
+                document.getElementById('whisper-file'         ).style.display = 'none';
+                document.getElementById('model-whisper-status' ).innerHTML = 'loaded model: ' + file.name;
+            }
+            function loadWhisper(model) {
+                let urls = {
+                    'tiny.en': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en.bin',
+                    'base.en': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en.bin',
+                };
+                let sizes = {
+                    'tiny.en': 75,
+                    'base.en': 142,
+                };
+                let url     = urls[model];
+                let dst     = 'whisper.bin';
+                let size_mb = sizes[model];
+                document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
+                document.getElementById('fetch-whisper-base-en').style.display = 'none';
+                document.getElementById('model-whisper-status').innerHTML = 'loading "' + model + '" ... ';
+                cbProgress = function(p) {
+                    let el = document.getElementById('fetch-whisper-progress');
+                    el.innerHTML = Math.round(100*p) + '%';
+                };
+                cbCancel = function() {
+                    var el;
+                    el = document.getElementById('fetch-whisper-tiny-en'); if (el) el.style.display = 'inline-block';
+                    el = document.getElementById('fetch-whisper-base-en'); if (el) el.style.display = 'inline-block';
+                    el = document.getElementById('model-whisper-status');  if (el) el.innerHTML = '';
+                };
+                loadRemote(url, dst, size_mb, cbProgress, storeFS, cbCancel, printTextarea);
+            }
+            //
+            // main
+            //
+            function onBench() {
+                if (instance) {
+                    Module.free(instance);
+                }
+                instance = Module.init('whisper.bin');
+                if (instance) {
+                    printTextarea("js: whisper initialized, instance: " + instance);
+                }
+                document.getElementById('bench').disabled = true;
+                if (!instance) {
+                    printTextarea("js: failed to initialize whisper");
+                    return;
+                }
+            }
+        </script>
+        <script type="text/javascript" src="bench.js"></script>
+    </body>
+</html>

examples/bench/README.md CHANGED Viewed

@@ -1,6 +1,8 @@
 # bench
-A very basic tool for benchmarking the inference performance on your device. The tool simply runs the Encoder part of the transformer on some random audio data and records the execution time. This way we can have an objective comparison of the performance of the model for various setups.
 Benchmark results are tracked in the following Github issue: https://github.com/ggerganov/whisper.cpp/issues/89

 # bench
+A very basic tool for benchmarking the inference performance on your device. The tool simply runs the Encoder part of
+the transformer on some random audio data and records the execution time. This way we can have an objective comparison
+of the performance of the model for various setups.
 Benchmark results are tracked in the following Github issue: https://github.com/ggerganov/whisper.cpp/issues/89

extra/deploy-wasm.sh CHANGED Viewed

@@ -25,6 +25,7 @@ scp bin/whisper.wasm/* root@linode0:/var/www/html/whisper/         && scp bin/li
 scp bin/stream.wasm/*  root@linode0:/var/www/html/whisper/stream/  && scp bin/libstream.worker.js  root@linode0:/var/www/html/whisper/stream/
 scp bin/command.wasm/* root@linode0:/var/www/html/whisper/command/ && scp bin/libcommand.worker.js root@linode0:/var/www/html/whisper/command/
 scp bin/talk.wasm/*    root@linode0:/var/www/html/whisper/talk/    && scp bin/libtalk.worker.js    root@linode0:/var/www/html/whisper/talk/
 echo "Done"
 exit

 scp bin/stream.wasm/*  root@linode0:/var/www/html/whisper/stream/  && scp bin/libstream.worker.js  root@linode0:/var/www/html/whisper/stream/
 scp bin/command.wasm/* root@linode0:/var/www/html/whisper/command/ && scp bin/libcommand.worker.js root@linode0:/var/www/html/whisper/command/
 scp bin/talk.wasm/*    root@linode0:/var/www/html/whisper/talk/    && scp bin/libtalk.worker.js    root@linode0:/var/www/html/whisper/talk/
+scp bin/bench.wasm/*   root@linode0:/var/www/html/whisper/bench/   && scp bin/libbench.worker.js   root@linode0:/var/www/html/whisper/bench/
 echo "Done"
 exit