ggerganov commited on
Commit
6113fbd
·
1 Parent(s): db42b8a

Adding sanitizer tests

Browse files
Files changed (5) hide show
  1. .github/workflows/build.yml +3 -3
  2. .gitignore +1 -0
  3. tests/CMakeLists.txt +62 -0
  4. whisper.cpp +9 -2
  5. whisper.h +6 -5
.github/workflows/build.yml CHANGED
@@ -61,7 +61,7 @@ jobs:
61
  - name: Build
62
  run: |
63
  make
64
- ctest --output-on-failure
65
 
66
  ubuntu-latest-clang:
67
  runs-on: ubuntu-latest
@@ -87,7 +87,7 @@ jobs:
87
  - name: Build
88
  run: |
89
  make
90
- ctest --output-on-failure
91
 
92
  ubuntu-latest-gcc-sanitized:
93
  runs-on: ubuntu-latest
@@ -112,4 +112,4 @@ jobs:
112
  - name: Build
113
  run: |
114
  make
115
- ctest --output-on-failure
 
61
  - name: Build
62
  run: |
63
  make
64
+ ctest -L gh --output-on-failure
65
 
66
  ubuntu-latest-clang:
67
  runs-on: ubuntu-latest
 
87
  - name: Build
88
  run: |
89
  make
90
+ ctest -L gh --output-on-failure
91
 
92
  ubuntu-latest-gcc-sanitized:
93
  runs-on: ubuntu-latest
 
112
  - name: Build
113
  run: |
114
  make
115
+ ctest -L gh --output-on-failure
.gitignore CHANGED
@@ -4,3 +4,4 @@ stream
4
  *.o
5
  .cache
6
  build/
 
 
4
  *.o
5
  .cache
6
  build/
7
+ compile_commands.json
tests/CMakeLists.txt CHANGED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ set(TEST_TARGET test-main-tiny)
2
+ add_test(NAME ${TEST_TARGET}
3
+ COMMAND $<TARGET_FILE:main>
4
+ -m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-tiny.bin
5
+ -f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
6
+ set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "tiny;gh")
7
+
8
+ set(TEST_TARGET test-main-tiny.en)
9
+ add_test(NAME ${TEST_TARGET}
10
+ COMMAND $<TARGET_FILE:main>
11
+ -m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-tiny.en.bin
12
+ -f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
13
+ set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "tiny;en;gh")
14
+
15
+ set(TEST_TARGET test-main-base)
16
+ add_test(NAME ${TEST_TARGET}
17
+ COMMAND $<TARGET_FILE:main>
18
+ -m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-base.bin
19
+ -f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
20
+ set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "base")
21
+
22
+ set(TEST_TARGET test-main-base.en)
23
+ add_test(NAME ${TEST_TARGET}
24
+ COMMAND $<TARGET_FILE:main>
25
+ -m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-base.en.bin
26
+ -f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
27
+ set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "base;en")
28
+
29
+ set(TEST_TARGET test-main-small)
30
+ add_test(NAME ${TEST_TARGET}
31
+ COMMAND $<TARGET_FILE:main>
32
+ -m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-small.bin
33
+ -f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
34
+ set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "small")
35
+
36
+ set(TEST_TARGET test-main-small.en)
37
+ add_test(NAME ${TEST_TARGET}
38
+ COMMAND $<TARGET_FILE:main>
39
+ -m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-small.en.bin
40
+ -f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
41
+ set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "small;en")
42
+
43
+ set(TEST_TARGET test-main-medium)
44
+ add_test(NAME ${TEST_TARGET}
45
+ COMMAND $<TARGET_FILE:main>
46
+ -m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-medium.bin
47
+ -f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
48
+ set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "medium")
49
+
50
+ set(TEST_TARGET test-main-medium.en)
51
+ add_test(NAME ${TEST_TARGET}
52
+ COMMAND $<TARGET_FILE:main>
53
+ -m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-medium.en.bin
54
+ -f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
55
+ set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "medium;en")
56
+
57
+ set(TEST_TARGET test-main-large)
58
+ add_test(NAME ${TEST_TARGET}
59
+ COMMAND $<TARGET_FILE:main>
60
+ -m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-large.bin
61
+ -f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
62
+ set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "large")
whisper.cpp CHANGED
@@ -950,6 +950,7 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
950
 
951
  // load weights
952
  {
 
953
  size_t total_size = 0;
954
 
955
  while (true) {
@@ -1004,9 +1005,17 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
1004
 
1005
  //printf("%24s - [%5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
1006
  total_size += ggml_nbytes(tensor);
 
1007
  }
1008
 
1009
  printf("%s: model size = %8.2f MB\n", __func__, total_size/1024.0/1024.0);
 
 
 
 
 
 
 
1010
  }
1011
 
1012
  fin.close();
@@ -1772,8 +1781,6 @@ bool whisper_decode(
1772
  }
1773
 
1774
  // the most basic sampling scheme - select the top token
1775
- // TODO: beam search
1776
- // TODO: temperature
1777
  whisper_vocab::id whisper_sample_best(
1778
  const whisper_vocab & vocab,
1779
  const float * probs, bool need_timestamp) {
 
950
 
951
  // load weights
952
  {
953
+ int n_loaded = 0;
954
  size_t total_size = 0;
955
 
956
  while (true) {
 
1005
 
1006
  //printf("%24s - [%5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
1007
  total_size += ggml_nbytes(tensor);
1008
+ n_loaded++;
1009
  }
1010
 
1011
  printf("%s: model size = %8.2f MB\n", __func__, total_size/1024.0/1024.0);
1012
+
1013
+ if (n_loaded == 0) {
1014
+ printf("%s: WARN no tensors loaded from model file - assuming empty model for testing\n", __func__);
1015
+ } else if (n_loaded != model.tensors.size()) {
1016
+ fprintf(stderr, "%s: ERROR not all tensors loaded from model file - expected %zu, got %d\n", __func__, model.tensors.size(), n_loaded);
1017
+ return false;
1018
+ }
1019
  }
1020
 
1021
  fin.close();
 
1781
  }
1782
 
1783
  // the most basic sampling scheme - select the top token
 
 
1784
  whisper_vocab::id whisper_sample_best(
1785
  const whisper_vocab & vocab,
1786
  const float * probs, bool need_timestamp) {
whisper.h CHANGED
@@ -71,11 +71,12 @@ extern "C" {
71
  // return the id of the specified language, returns -1 if not found
72
  WHISPER_API int whisper_lang_id(const char * lang);
73
 
74
- WHISPER_API int whisper_n_len (struct whisper_context * ctx); // mel length
75
- WHISPER_API int whisper_n_vocab (struct whisper_context * ctx);
76
- WHISPER_API int whisper_n_text_ctx (struct whisper_context * ctx);
77
- WHISPER_API int whisper_is_multilingual(struct whisper_context * ctx);
78
- WHISPER_API float * whisper_get_probs (struct whisper_context * ctx);
 
79
 
80
  WHISPER_API const char * whisper_token_to_str(struct whisper_context * ctx, whisper_token token);
81
 
 
71
  // return the id of the specified language, returns -1 if not found
72
  WHISPER_API int whisper_lang_id(const char * lang);
73
 
74
+ WHISPER_API int whisper_n_len (struct whisper_context * ctx); // mel length
75
+ WHISPER_API int whisper_n_vocab (struct whisper_context * ctx);
76
+ WHISPER_API int whisper_n_text_ctx (struct whisper_context * ctx);
77
+ WHISPER_API int whisper_is_multilingual(struct whisper_context * ctx);
78
+
79
+ WHISPER_API float * whisper_get_probs(struct whisper_context * ctx);
80
 
81
  WHISPER_API const char * whisper_token_to_str(struct whisper_context * ctx, whisper_token token);
82