Spaces:
Running
Running
Adding sanitizer tests
Browse files- .github/workflows/build.yml +3 -3
- .gitignore +1 -0
- tests/CMakeLists.txt +62 -0
- whisper.cpp +9 -2
- whisper.h +6 -5
.github/workflows/build.yml
CHANGED
|
@@ -61,7 +61,7 @@ jobs:
|
|
| 61 |
- name: Build
|
| 62 |
run: |
|
| 63 |
make
|
| 64 |
-
ctest --output-on-failure
|
| 65 |
|
| 66 |
ubuntu-latest-clang:
|
| 67 |
runs-on: ubuntu-latest
|
|
@@ -87,7 +87,7 @@ jobs:
|
|
| 87 |
- name: Build
|
| 88 |
run: |
|
| 89 |
make
|
| 90 |
-
ctest --output-on-failure
|
| 91 |
|
| 92 |
ubuntu-latest-gcc-sanitized:
|
| 93 |
runs-on: ubuntu-latest
|
|
@@ -112,4 +112,4 @@ jobs:
|
|
| 112 |
- name: Build
|
| 113 |
run: |
|
| 114 |
make
|
| 115 |
-
ctest --output-on-failure
|
|
|
|
| 61 |
- name: Build
|
| 62 |
run: |
|
| 63 |
make
|
| 64 |
+
ctest -L gh --output-on-failure
|
| 65 |
|
| 66 |
ubuntu-latest-clang:
|
| 67 |
runs-on: ubuntu-latest
|
|
|
|
| 87 |
- name: Build
|
| 88 |
run: |
|
| 89 |
make
|
| 90 |
+
ctest -L gh --output-on-failure
|
| 91 |
|
| 92 |
ubuntu-latest-gcc-sanitized:
|
| 93 |
runs-on: ubuntu-latest
|
|
|
|
| 112 |
- name: Build
|
| 113 |
run: |
|
| 114 |
make
|
| 115 |
+
ctest -L gh --output-on-failure
|
.gitignore
CHANGED
|
@@ -4,3 +4,4 @@ stream
|
|
| 4 |
*.o
|
| 5 |
.cache
|
| 6 |
build/
|
|
|
|
|
|
| 4 |
*.o
|
| 5 |
.cache
|
| 6 |
build/
|
| 7 |
+
compile_commands.json
|
tests/CMakeLists.txt
CHANGED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
set(TEST_TARGET test-main-tiny)
|
| 2 |
+
add_test(NAME ${TEST_TARGET}
|
| 3 |
+
COMMAND $<TARGET_FILE:main>
|
| 4 |
+
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-tiny.bin
|
| 5 |
+
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
|
| 6 |
+
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "tiny;gh")
|
| 7 |
+
|
| 8 |
+
set(TEST_TARGET test-main-tiny.en)
|
| 9 |
+
add_test(NAME ${TEST_TARGET}
|
| 10 |
+
COMMAND $<TARGET_FILE:main>
|
| 11 |
+
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-tiny.en.bin
|
| 12 |
+
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
|
| 13 |
+
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "tiny;en;gh")
|
| 14 |
+
|
| 15 |
+
set(TEST_TARGET test-main-base)
|
| 16 |
+
add_test(NAME ${TEST_TARGET}
|
| 17 |
+
COMMAND $<TARGET_FILE:main>
|
| 18 |
+
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-base.bin
|
| 19 |
+
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
|
| 20 |
+
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "base")
|
| 21 |
+
|
| 22 |
+
set(TEST_TARGET test-main-base.en)
|
| 23 |
+
add_test(NAME ${TEST_TARGET}
|
| 24 |
+
COMMAND $<TARGET_FILE:main>
|
| 25 |
+
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-base.en.bin
|
| 26 |
+
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
|
| 27 |
+
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "base;en")
|
| 28 |
+
|
| 29 |
+
set(TEST_TARGET test-main-small)
|
| 30 |
+
add_test(NAME ${TEST_TARGET}
|
| 31 |
+
COMMAND $<TARGET_FILE:main>
|
| 32 |
+
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-small.bin
|
| 33 |
+
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
|
| 34 |
+
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "small")
|
| 35 |
+
|
| 36 |
+
set(TEST_TARGET test-main-small.en)
|
| 37 |
+
add_test(NAME ${TEST_TARGET}
|
| 38 |
+
COMMAND $<TARGET_FILE:main>
|
| 39 |
+
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-small.en.bin
|
| 40 |
+
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
|
| 41 |
+
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "small;en")
|
| 42 |
+
|
| 43 |
+
set(TEST_TARGET test-main-medium)
|
| 44 |
+
add_test(NAME ${TEST_TARGET}
|
| 45 |
+
COMMAND $<TARGET_FILE:main>
|
| 46 |
+
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-medium.bin
|
| 47 |
+
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
|
| 48 |
+
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "medium")
|
| 49 |
+
|
| 50 |
+
set(TEST_TARGET test-main-medium.en)
|
| 51 |
+
add_test(NAME ${TEST_TARGET}
|
| 52 |
+
COMMAND $<TARGET_FILE:main>
|
| 53 |
+
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-medium.en.bin
|
| 54 |
+
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
|
| 55 |
+
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "medium;en")
|
| 56 |
+
|
| 57 |
+
set(TEST_TARGET test-main-large)
|
| 58 |
+
add_test(NAME ${TEST_TARGET}
|
| 59 |
+
COMMAND $<TARGET_FILE:main>
|
| 60 |
+
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-large.bin
|
| 61 |
+
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
|
| 62 |
+
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "large")
|
whisper.cpp
CHANGED
|
@@ -950,6 +950,7 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
|
|
| 950 |
|
| 951 |
// load weights
|
| 952 |
{
|
|
|
|
| 953 |
size_t total_size = 0;
|
| 954 |
|
| 955 |
while (true) {
|
|
@@ -1004,9 +1005,17 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
|
|
| 1004 |
|
| 1005 |
//printf("%24s - [%5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
|
| 1006 |
total_size += ggml_nbytes(tensor);
|
|
|
|
| 1007 |
}
|
| 1008 |
|
| 1009 |
printf("%s: model size = %8.2f MB\n", __func__, total_size/1024.0/1024.0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1010 |
}
|
| 1011 |
|
| 1012 |
fin.close();
|
|
@@ -1772,8 +1781,6 @@ bool whisper_decode(
|
|
| 1772 |
}
|
| 1773 |
|
| 1774 |
// the most basic sampling scheme - select the top token
|
| 1775 |
-
// TODO: beam search
|
| 1776 |
-
// TODO: temperature
|
| 1777 |
whisper_vocab::id whisper_sample_best(
|
| 1778 |
const whisper_vocab & vocab,
|
| 1779 |
const float * probs, bool need_timestamp) {
|
|
|
|
| 950 |
|
| 951 |
// load weights
|
| 952 |
{
|
| 953 |
+
int n_loaded = 0;
|
| 954 |
size_t total_size = 0;
|
| 955 |
|
| 956 |
while (true) {
|
|
|
|
| 1005 |
|
| 1006 |
//printf("%24s - [%5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
|
| 1007 |
total_size += ggml_nbytes(tensor);
|
| 1008 |
+
n_loaded++;
|
| 1009 |
}
|
| 1010 |
|
| 1011 |
printf("%s: model size = %8.2f MB\n", __func__, total_size/1024.0/1024.0);
|
| 1012 |
+
|
| 1013 |
+
if (n_loaded == 0) {
|
| 1014 |
+
printf("%s: WARN no tensors loaded from model file - assuming empty model for testing\n", __func__);
|
| 1015 |
+
} else if (n_loaded != model.tensors.size()) {
|
| 1016 |
+
fprintf(stderr, "%s: ERROR not all tensors loaded from model file - expected %zu, got %d\n", __func__, model.tensors.size(), n_loaded);
|
| 1017 |
+
return false;
|
| 1018 |
+
}
|
| 1019 |
}
|
| 1020 |
|
| 1021 |
fin.close();
|
|
|
|
| 1781 |
}
|
| 1782 |
|
| 1783 |
// the most basic sampling scheme - select the top token
|
|
|
|
|
|
|
| 1784 |
whisper_vocab::id whisper_sample_best(
|
| 1785 |
const whisper_vocab & vocab,
|
| 1786 |
const float * probs, bool need_timestamp) {
|
whisper.h
CHANGED
|
@@ -71,11 +71,12 @@ extern "C" {
|
|
| 71 |
// return the id of the specified language, returns -1 if not found
|
| 72 |
WHISPER_API int whisper_lang_id(const char * lang);
|
| 73 |
|
| 74 |
-
WHISPER_API int
|
| 75 |
-
WHISPER_API int
|
| 76 |
-
WHISPER_API int
|
| 77 |
-
WHISPER_API int
|
| 78 |
-
|
|
|
|
| 79 |
|
| 80 |
WHISPER_API const char * whisper_token_to_str(struct whisper_context * ctx, whisper_token token);
|
| 81 |
|
|
|
|
| 71 |
// return the id of the specified language, returns -1 if not found
|
| 72 |
WHISPER_API int whisper_lang_id(const char * lang);
|
| 73 |
|
| 74 |
+
WHISPER_API int whisper_n_len (struct whisper_context * ctx); // mel length
|
| 75 |
+
WHISPER_API int whisper_n_vocab (struct whisper_context * ctx);
|
| 76 |
+
WHISPER_API int whisper_n_text_ctx (struct whisper_context * ctx);
|
| 77 |
+
WHISPER_API int whisper_is_multilingual(struct whisper_context * ctx);
|
| 78 |
+
|
| 79 |
+
WHISPER_API float * whisper_get_probs(struct whisper_context * ctx);
|
| 80 |
|
| 81 |
WHISPER_API const char * whisper_token_to_str(struct whisper_context * ctx, whisper_token token);
|
| 82 |
|