ggerganov commited on
Commit
f79068a
Β·
1 Parent(s): 0cdf85e

refactoring : move main + stream in examples + other stuff

Browse files
.gitignore CHANGED
@@ -1,17 +1,21 @@
1
- sync.sh
2
- main
3
- stream
4
  *.o
5
- .cache
 
 
 
 
6
  build/
7
  build-em/
8
  build-debug/
9
  build-release/
10
- out/
11
- .vs/
12
- .vscode/
 
 
 
 
13
  compile_commands.json
14
- .DS_Store
15
 
16
  examples/whisper.objc/whisper.objc.xcodeproj/xcshareddata
17
  examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/
 
 
 
 
1
  *.o
2
+ .cache/
3
+ .vs/
4
+ .vscode/
5
+ .DS_Store
6
+
7
  build/
8
  build-em/
9
  build-debug/
10
  build-release/
11
+ build-sanitize-addr/
12
+ build-sanitize-thread/
13
+
14
+ main
15
+ stream
16
+ bench
17
+ sync.sh
18
  compile_commands.json
 
19
 
20
  examples/whisper.objc/whisper.objc.xcodeproj/xcshareddata
21
  examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/
CMakeLists.txt CHANGED
@@ -48,7 +48,7 @@ option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" O
48
 
49
  if (NOT MSVC)
50
  if (WHISPER_SANITIZE_THREAD)
51
- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread")
52
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread")
53
  endif()
54
 
@@ -133,7 +133,9 @@ else()
133
  endif()
134
  endif()
135
 
 
136
  # whisper - this is the main library of the project
 
137
 
138
  set(TARGET whisper)
139
 
@@ -167,40 +169,17 @@ install(TARGETS ${TARGET}
167
  ARCHIVE DESTINATION lib/static
168
  )
169
 
 
170
  # bindings
 
171
 
172
  add_subdirectory(bindings)
173
 
 
174
  # programs, examples and tests
 
175
 
176
  if (WHISPER_STANDALONE)
177
- if (NOT EMSCRIPTEN)
178
- # TODO: move to examples
179
- # main
180
- set(TARGET main)
181
- add_executable(${TARGET} main.cpp)
182
- target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})
183
-
184
- # TODO: move to examples
185
- if (WHISPER_SUPPORT_SDL2)
186
- if (WHISPER_SUPPORT_SDL2)
187
- # SDL2
188
- find_package(SDL2 REQUIRED)
189
-
190
- string(STRIP "${SDL2_LIBRARIES}" SDL2_LIBRARIES)
191
-
192
- message(STATUS "SDL2_INCLUDE_DIRS = ${SDL2_INCLUDE_DIRS}")
193
- message(STATUS "SDL2_LIBRARIES = ${SDL2_LIBRARIES}")
194
- endif()
195
-
196
- # stream
197
- set(TARGET stream)
198
- add_executable(${TARGET} stream.cpp)
199
- target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
200
- target_link_libraries(${TARGET} PRIVATE whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
201
- endif ()
202
- endif()
203
-
204
  if (WHISPER_BUILD_TESTS)
205
  enable_testing()
206
  add_subdirectory(tests)
 
48
 
49
  if (NOT MSVC)
50
  if (WHISPER_SANITIZE_THREAD)
51
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread")
52
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread")
53
  endif()
54
 
 
133
  endif()
134
  endif()
135
 
136
+ #
137
  # whisper - this is the main library of the project
138
+ #
139
 
140
  set(TARGET whisper)
141
 
 
169
  ARCHIVE DESTINATION lib/static
170
  )
171
 
172
+ #
173
  # bindings
174
+ #
175
 
176
  add_subdirectory(bindings)
177
 
178
+ #
179
  # programs, examples and tests
180
+ #
181
 
182
  if (WHISPER_STANDALONE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  if (WHISPER_BUILD_TESTS)
184
  enable_testing()
185
  add_subdirectory(tests)
Makefile CHANGED
@@ -19,13 +19,10 @@ endif
19
  # Compile flags
20
  #
21
 
22
- CFLAGS = -O3 -std=c11
23
- CXXFLAGS = -O3 -std=c++11
24
  LDFLAGS =
25
 
26
- CFLAGS += -Wall -Wextra -Wno-unused-parameter -Wno-unused-function
27
- CXXFLAGS += -Wall -Wextra -Wno-unused-parameter -Wno-unused-function
28
-
29
  # OS specific
30
  # TODO: support Windows
31
  ifeq ($(UNAME_S),Linux)
@@ -76,8 +73,8 @@ endif
76
  # Build library + main
77
  #
78
 
79
- main: main.cpp ggml.o whisper.o
80
- $(CXX) $(CXXFLAGS) main.cpp whisper.o ggml.o -o main $(LDFLAGS)
81
  ./main -h
82
 
83
  ggml.o: ggml.c ggml.h
@@ -90,7 +87,7 @@ libwhisper.a: ggml.o whisper.o
90
  ar rcs libwhisper.a ggml.o whisper.o
91
 
92
  clean:
93
- rm -f *.o main stream libwhisper.a
94
 
95
  #
96
  # Examples
@@ -98,8 +95,11 @@ clean:
98
 
99
  CC_SDL=`sdl2-config --cflags --libs`
100
 
101
- stream: stream.cpp ggml.o whisper.o
102
- $(CXX) $(CXXFLAGS) stream.cpp ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)
 
 
 
103
 
104
  #
105
  # Audio samples
@@ -139,7 +139,7 @@ samples:
139
  .PHONY: large
140
 
141
  tiny.en tiny base.en base small.en small medium.en medium large: main
142
- bash ./download-ggml-model.sh $@
143
  @echo ""
144
  @echo "==============================================="
145
  @echo "Running $@ on all samples in ./samples ..."
 
19
  # Compile flags
20
  #
21
 
22
+ CFLAGS = -I. -O3 -std=c11
23
+ CXXFLAGS = -I. -I./examples -O3 -std=c++11
24
  LDFLAGS =
25
 
 
 
 
26
  # OS specific
27
  # TODO: support Windows
28
  ifeq ($(UNAME_S),Linux)
 
73
  # Build library + main
74
  #
75
 
76
+ main: examples/main/main.cpp ggml.o whisper.o
77
+ $(CXX) $(CXXFLAGS) examples/main/main.cpp whisper.o ggml.o -o main $(LDFLAGS)
78
  ./main -h
79
 
80
  ggml.o: ggml.c ggml.h
 
87
  ar rcs libwhisper.a ggml.o whisper.o
88
 
89
  clean:
90
+ rm -f *.o main stream bench libwhisper.a
91
 
92
  #
93
  # Examples
 
95
 
96
  CC_SDL=`sdl2-config --cflags --libs`
97
 
98
+ stream: examples/stream/stream.cpp ggml.o whisper.o
99
+ $(CXX) $(CXXFLAGS) examples/stream/stream.cpp ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)
100
+
101
+ bench: examples/bench/bench.cpp ggml.o whisper.o
102
+ $(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o whisper.o -o bench $(LDFLAGS)
103
 
104
  #
105
  # Audio samples
 
139
  .PHONY: large
140
 
141
  tiny.en tiny base.en base small.en small medium.en medium large: main
142
+ bash ./models/download-ggml-model.sh $@
143
  @echo ""
144
  @echo "==============================================="
145
  @echo "Running $@ on all samples in ./samples ..."
README.md CHANGED
@@ -24,23 +24,32 @@ Supported platforms:
24
  - [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/issues/7)
25
  - [x] [Android](https://github.com/ggerganov/whisper.cpp/issues/30)
26
 
 
 
 
 
 
27
  Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
28
  As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device:
29
 
30
  https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4
31
 
32
- ## Usage
33
 
34
- To build the main program, run `make`. You can then transcribe a `.wav` file like this:
35
 
36
  ```bash
37
- ./main -f input.wav
38
  ```
39
 
40
- Before running the program, make sure to download one of the ggml Whisper models. For example:
41
 
42
  ```bash
43
- bash ./download-ggml-model.sh base.en
 
 
 
 
44
  ```
45
 
46
  ---
@@ -73,7 +82,7 @@ options:
73
  -m FNAME, --model FNAME model path (default: models/ggml-base.en.bin)
74
  -f FNAME, --file FNAME input WAV file path
75
 
76
- bash ./download-ggml-model.sh base.en
77
  Downloading ggml model base.en ...
78
  models/ggml-base.en.bin 100%[=============================================>] 141.11M 3.13MB/s in 79s
79
  Done! Model 'base.en' saved in 'models/ggml-base.en.bin'
@@ -232,7 +241,7 @@ whisper_print_timings: total time = 33686.27 ms
232
  ## Real-time audio input example
233
 
234
  This is a naive example of performing real-time inference on audio from your microphone.
235
- The `stream` tool samples the audio every half a second and runs the transcription continously.
236
  More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
237
 
238
  ```java
@@ -241,7 +250,7 @@ More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/i
241
 
242
  https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
243
 
244
- The `stream` tool depends on SDL2 library to capture audio from the microphone. You can build it like this:
245
 
246
  ```bash
247
  # Install SDL2 on Linux
@@ -264,8 +273,9 @@ to highlight words with high or low confidence:
264
 
265
  - The core tensor operations are implemented in C ([ggml.h](ggml.h) / [ggml.c](ggml.c))
266
  - The high-level C-style API is implemented in C++ ([whisper.h](whisper.h) / [whisper.cpp](whisper.cpp))
267
- - Simple usage is demonstrated in [main.cpp](main.cpp)
268
- - Sample real-time audio transcription from the microphone is demonstrated in [stream.cpp](stream.cpp)
 
269
 
270
  The tensor operators are optimized heavily for Apple silicon CPUs. Depending on the computation size, Arm Neon SIMD
271
  instrisics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since
@@ -279,11 +289,11 @@ the Accelerate framework utilizes the special-purpose AMX coprocessor available
279
  This should be similar to the [GreedyDecoder](https://github.com/openai/whisper/blob/main/whisper/decoding.py#L249-L274)
280
  from the original python implementation, so in order to make a fair comparison between the 2 implementations, make sure
281
  to run the python code with the following parameters:
282
-
283
  ```
284
  whisper --best_of None --beam_size None ...
285
  ```
286
-
287
  In the future, `whisper.cpp` will support more sampling strategies.
288
 
289
  ## Memory usage
@@ -306,7 +316,7 @@ The original models are converted to a custom binary format. This allows to pack
306
  - vocabulary
307
  - weights
308
 
309
- You can download the converted models using the [download-ggml-model.sh](download-ggml-model.sh) script or from here:
310
 
311
  https://ggml.ggerganov.com
312
 
 
24
  - [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/issues/7)
25
  - [x] [Android](https://github.com/ggerganov/whisper.cpp/issues/30)
26
 
27
+ The entire implementation of the model is contained in 2 source files:
28
+
29
+ - [ggml.h](ggml.h) / [ggml.c](ggml.c)
30
+ - [whisper.h](whisper.h) / [whisper.cpp](whisper.cpp)
31
+
32
  Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
33
  As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device:
34
 
35
  https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4
36
 
37
+ ## Quick start
38
 
39
+ First, download one of the Whisper models converted in [ggml format](models). For example:
40
 
41
  ```bash
42
+ bash ./models/download-ggml-model.sh base.en
43
  ```
44
 
45
+ Now build the [main](examples/main) example and transcribe an audio file like this:
46
 
47
  ```bash
48
+ # build the main example
49
+ make
50
+
51
+ # transcribe an audio file
52
+ ./main -f input.wav
53
  ```
54
 
55
  ---
 
82
  -m FNAME, --model FNAME model path (default: models/ggml-base.en.bin)
83
  -f FNAME, --file FNAME input WAV file path
84
 
85
+ bash ./models/download-ggml-model.sh base.en
86
  Downloading ggml model base.en ...
87
  models/ggml-base.en.bin 100%[=============================================>] 141.11M 3.13MB/s in 79s
88
  Done! Model 'base.en' saved in 'models/ggml-base.en.bin'
 
241
  ## Real-time audio input example
242
 
243
  This is a naive example of performing real-time inference on audio from your microphone.
244
+ The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continously.
245
  More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
246
 
247
  ```java
 
250
 
251
  https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
252
 
253
+ The [stream](examples/stream) tool depends on SDL2 library to capture audio from the microphone. You can build it like this:
254
 
255
  ```bash
256
  # Install SDL2 on Linux
 
273
 
274
  - The core tensor operations are implemented in C ([ggml.h](ggml.h) / [ggml.c](ggml.c))
275
  - The high-level C-style API is implemented in C++ ([whisper.h](whisper.h) / [whisper.cpp](whisper.cpp))
276
+ - Sample usage is demonstrated in [main.cpp](examples/main)
277
+ - Sample real-time audio transcription from the microphone is demonstrated in [stream.cpp](examples/stream)
278
+ - Various other examples are available in the [examples](examples) folder
279
 
280
  The tensor operators are optimized heavily for Apple silicon CPUs. Depending on the computation size, Arm Neon SIMD
281
  instrisics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since
 
289
  This should be similar to the [GreedyDecoder](https://github.com/openai/whisper/blob/main/whisper/decoding.py#L249-L274)
290
  from the original python implementation, so in order to make a fair comparison between the 2 implementations, make sure
291
  to run the python code with the following parameters:
292
+
293
  ```
294
  whisper --best_of None --beam_size None ...
295
  ```
296
+
297
  In the future, `whisper.cpp` will support more sampling strategies.
298
 
299
  ## Memory usage
 
316
  - vocabulary
317
  - weights
318
 
319
+ You can download the converted models using the [models/download-ggml-model.sh](models/download-ggml-model.sh) script or from here:
320
 
321
  https://ggml.ggerganov.com
322
 
examples/CMakeLists.txt CHANGED
@@ -4,11 +4,24 @@ find_package(Threads REQUIRED)
4
 
5
  # third-party
6
 
7
- #add_subdirectory(third-party)
 
 
 
 
 
 
 
 
8
 
9
  # examples
10
 
 
 
11
  if (EMSCRIPTEN)
12
  add_subdirectory(whisper.wasm)
13
  else()
 
 
 
14
  endif()
 
4
 
5
  # third-party
6
 
7
+ if (WHISPER_SUPPORT_SDL2)
8
+ # SDL2
9
+ find_package(SDL2 REQUIRED)
10
+
11
+ string(STRIP "${SDL2_LIBRARIES}" SDL2_LIBRARIES)
12
+
13
+ message(STATUS "SDL2_INCLUDE_DIRS = ${SDL2_INCLUDE_DIRS}")
14
+ message(STATUS "SDL2_LIBRARIES = ${SDL2_LIBRARIES}")
15
+ endif()
16
 
17
  # examples
18
 
19
+ include_directories(${CMAKE_CURRENT_SOURCE_DIR})
20
+
21
  if (EMSCRIPTEN)
22
  add_subdirectory(whisper.wasm)
23
  else()
24
+ add_subdirectory(main)
25
+ add_subdirectory(stream)
26
+ add_subdirectory(bench)
27
  endif()
examples/bench/CMakeLists.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ set(TARGET bench)
2
+ add_executable(${TARGET} bench.cpp)
3
+ target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})
examples/bench/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # bench
2
+
3
+ TODO
examples/bench/bench.cpp ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "whisper.h"
2
+
3
+ #include <cstdio>
4
+ #include <string>
5
+ #include <thread>
6
+
7
+ // command-line parameters
8
+ struct whisper_params {
9
+ int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
10
+
11
+ std::string model = "models/ggml-base.en.bin";
12
+ };
13
+
14
+ void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
15
+
16
+ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
17
+ for (int i = 1; i < argc; i++) {
18
+ std::string arg = argv[i];
19
+
20
+ if (arg == "-t" || arg == "--threads") {
21
+ params.n_threads = std::stoi(argv[++i]);
22
+ } else if (arg == "-m" || arg == "--model") {
23
+ params.model = argv[++i];
24
+ } else if (arg == "-h" || arg == "--help") {
25
+ whisper_print_usage(argc, argv, params);
26
+ exit(0);
27
+ } else {
28
+ fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
29
+ whisper_print_usage(argc, argv, params);
30
+ exit(0);
31
+ }
32
+ }
33
+
34
+ return true;
35
+ }
36
+
37
+ void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
38
+ fprintf(stderr, "\n");
39
+ fprintf(stderr, "usage: %s [options]\n", argv[0]);
40
+ fprintf(stderr, "\n");
41
+ fprintf(stderr, "options:\n");
42
+ fprintf(stderr, " -h, --help show this help message and exit\n");
43
+ fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
44
+ fprintf(stderr, " -m FNAME, --model FNAME model path (default: %s)\n", params.model.c_str());
45
+ fprintf(stderr, "\n");
46
+ }
47
+
48
+ int main(int argc, char ** argv) {
49
+ whisper_params params;
50
+
51
+ if (whisper_params_parse(argc, argv, params) == false) {
52
+ return 1;
53
+ }
54
+
55
+ // whisper init
56
+
57
+ struct whisper_context * ctx = whisper_init(params.model.c_str());
58
+
59
+ if (ctx == nullptr) {
60
+ fprintf(stderr, "error: failed to initialize whisper context\n");
61
+ return 2;
62
+ }
63
+
64
+ if (int ret = whisper_set_mel(ctx, nullptr, 0, WHISPER_N_MEL)) {
65
+ fprintf(stderr, "error: failed to set mel: %d\n", ret);
66
+ return 3;
67
+ }
68
+
69
+ if (int ret = whisper_encode(ctx, 0, params.n_threads) != 0) {
70
+ fprintf(stderr, "error: failed to encode model: %d\n", ret);
71
+ return 4;
72
+ }
73
+
74
+ whisper_print_timings(ctx);
75
+ whisper_free(ctx);
76
+
77
+ return 0;
78
+ }
dr_wav.h β†’ examples/dr_wav.h RENAMED
File without changes
examples/main/CMakeLists.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ set(TARGET main)
2
+ add_executable(${TARGET} main.cpp)
3
+ target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})
examples/main/README.md ADDED
File without changes
main.cpp β†’ examples/main/main.cpp RENAMED
@@ -290,6 +290,11 @@ int main(int argc, char ** argv) {
290
 
291
  struct whisper_context * ctx = whisper_init(params.model.c_str());
292
 
 
 
 
 
 
293
  for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
294
  const auto fname_inp = params.fname_inp[f];
295
 
@@ -300,22 +305,22 @@ int main(int argc, char ** argv) {
300
  if (!drwav_init_file(&wav, fname_inp.c_str(), NULL)) {
301
  fprintf(stderr, "%s: failed to open WAV file '%s' - check your input\n", argv[0], fname_inp.c_str());
302
  whisper_print_usage(argc, argv, {});
303
- return 3;
304
  }
305
 
306
  if (wav.channels != 1 && wav.channels != 2) {
307
  fprintf(stderr, "%s: WAV file '%s' must be mono or stereo\n", argv[0], fname_inp.c_str());
308
- return 4;
309
  }
310
 
311
  if (wav.sampleRate != WHISPER_SAMPLE_RATE) {
312
  fprintf(stderr, "%s: WAV file '%s' must be 16 kHz\n", argv[0], fname_inp.c_str());
313
- return 5;
314
  }
315
 
316
  if (wav.bitsPerSample != 16) {
317
  fprintf(stderr, "%s: WAV file '%s' must be 16-bit\n", argv[0], fname_inp.c_str());
318
- return 6;
319
  }
320
 
321
  int n = wav.totalPCMFrameCount;
@@ -379,7 +384,7 @@ int main(int argc, char ** argv) {
379
 
380
  if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
381
  fprintf(stderr, "%s: failed to process audio\n", argv[0]);
382
- return 7;
383
  }
384
 
385
  printf("\n");
 
290
 
291
  struct whisper_context * ctx = whisper_init(params.model.c_str());
292
 
293
+ if (ctx == nullptr) {
294
+ fprintf(stderr, "error: failed to initialize whisper context\n");
295
+ return 3;
296
+ }
297
+
298
  for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
299
  const auto fname_inp = params.fname_inp[f];
300
 
 
305
  if (!drwav_init_file(&wav, fname_inp.c_str(), NULL)) {
306
  fprintf(stderr, "%s: failed to open WAV file '%s' - check your input\n", argv[0], fname_inp.c_str());
307
  whisper_print_usage(argc, argv, {});
308
+ return 4;
309
  }
310
 
311
  if (wav.channels != 1 && wav.channels != 2) {
312
  fprintf(stderr, "%s: WAV file '%s' must be mono or stereo\n", argv[0], fname_inp.c_str());
313
+ return 5;
314
  }
315
 
316
  if (wav.sampleRate != WHISPER_SAMPLE_RATE) {
317
  fprintf(stderr, "%s: WAV file '%s' must be 16 kHz\n", argv[0], fname_inp.c_str());
318
+ return 6;
319
  }
320
 
321
  if (wav.bitsPerSample != 16) {
322
  fprintf(stderr, "%s: WAV file '%s' must be 16-bit\n", argv[0], fname_inp.c_str());
323
+ return 7;
324
  }
325
 
326
  int n = wav.totalPCMFrameCount;
 
384
 
385
  if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
386
  fprintf(stderr, "%s: failed to process audio\n", argv[0]);
387
+ return 8;
388
  }
389
 
390
  printf("\n");
examples/stream/CMakeLists.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ if (WHISPER_SUPPORT_SDL2)
2
+ # stream
3
+ set(TARGET stream)
4
+ add_executable(${TARGET} stream.cpp)
5
+ target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
6
+ target_link_libraries(${TARGET} PRIVATE whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
7
+ endif ()
examples/stream/README.md ADDED
File without changes
stream.cpp β†’ examples/stream/stream.cpp RENAMED
File without changes
ggml.c CHANGED
@@ -15,10 +15,39 @@
15
  #include <stdio.h>
16
 
17
  #if defined _MSC_VER
18
- #include "msvc_thread_atomic.h"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  #else
20
  #include <pthread.h>
21
  #include <stdatomic.h>
 
22
  typedef void* thread_ret_t;
23
  #endif
24
 
 
15
  #include <stdio.h>
16
 
17
  #if defined _MSC_VER
18
+ #include <Windows.h>
19
+
20
+ typedef volatile LONG atomic_int;
21
+ typedef atomic_int atomic_bool;
22
+
23
+ static void atomic_store(atomic_int* ptr, LONG val) {
24
+ InterlockedExchange(ptr, val);
25
+ }
26
+ static LONG atomic_load(atomic_int* ptr) {
27
+ return InterlockedCompareExchange(ptr, 0, 0);
28
+ }
29
+ static LONG atomic_fetch_add(atomic_int* ptr, LONG inc) {
30
+ return InterlockedExchangeAdd(ptr, inc);
31
+ }
32
+ static LONG atomic_fetch_sub(atomic_int* ptr, LONG dec) {
33
+ return atomic_fetch_add(ptr, -(dec));
34
+ }
35
+
36
+ typedef HANDLE pthread_t;
37
+
38
+ typedef DWORD thread_ret_t;
39
+ static int pthread_create(pthread_t* out, void* unused, thread_ret_t(*func)(void*), void* arg) {
40
+ out = CreateThread(NULL, 0, func, arg, 0, NULL);
41
+ return out != NULL;
42
+ }
43
+
44
+ static int pthread_join(pthread_t thread, void* unused) {
45
+ return (int) WaitForSingleObject(thread, INFINITE);
46
+ }
47
  #else
48
  #include <pthread.h>
49
  #include <stdatomic.h>
50
+
51
  typedef void* thread_ret_t;
52
  #endif
53
 
download-ggml-model.sh β†’ models/download-ggml-model.sh RENAMED
@@ -3,7 +3,7 @@
3
  # This script downloads Whisper model files that have already been converted to ggml format.
4
  # This way you don't have to convert them yourself.
5
 
6
- ggml_path=$(dirname $(realpath $0))
7
 
8
  # Whisper models
9
  models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )
@@ -38,14 +38,14 @@ fi
38
 
39
  printf "Downloading ggml model $model ...\n"
40
 
41
- mkdir -p models
42
 
43
- if [ -f "models/ggml-$model.bin" ]; then
44
  printf "Model $model already exists. Skipping download.\n"
45
  exit 0
46
  fi
47
 
48
- wget --quiet --show-progress -O models/ggml-$model.bin https://ggml.ggerganov.com/ggml-model-whisper-$model.bin
49
 
50
  if [ $? -ne 0 ]; then
51
  printf "Failed to download ggml model $model \n"
 
3
  # This script downloads Whisper model files that have already been converted to ggml format.
4
  # This way you don't have to convert them yourself.
5
 
6
+ models_path=$(dirname $(realpath $0))
7
 
8
  # Whisper models
9
  models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )
 
38
 
39
  printf "Downloading ggml model $model ...\n"
40
 
41
+ cd $models_path
42
 
43
+ if [ -f "ggml-$model.bin" ]; then
44
  printf "Model $model already exists. Skipping download.\n"
45
  exit 0
46
  fi
47
 
48
+ wget --quiet --show-progress -O ggml-$model.bin https://ggml.ggerganov.com/ggml-model-whisper-$model.bin
49
 
50
  if [ $? -ne 0 ]; then
51
  printf "Failed to download ggml model $model \n"
msvc_thread_atomic.h DELETED
@@ -1,31 +0,0 @@
1
- #pragma once
2
- #include <Windows.h>
3
-
4
- typedef volatile LONG atomic_int;
5
- typedef atomic_int atomic_bool;
6
-
7
- static void atomic_store(atomic_int* ptr, LONG val) {
8
- InterlockedExchange(ptr, val);
9
- }
10
- static LONG atomic_load(atomic_int* ptr) {
11
- return InterlockedCompareExchange(ptr, 0, 0);
12
- }
13
- static LONG atomic_fetch_add(atomic_int* ptr, LONG inc) {
14
- return InterlockedExchangeAdd(ptr, inc);
15
- }
16
- static LONG atomic_fetch_sub(atomic_int* ptr, LONG dec) {
17
- return atomic_fetch_add(ptr, -(dec));
18
- }
19
-
20
- typedef HANDLE pthread_t;
21
-
22
- typedef DWORD thread_ret_t;
23
- static int pthread_create(pthread_t* out, void* unused, thread_ret_t(*func)(void*), void* arg) {
24
- out = CreateThread(NULL, 0, func, arg, 0, NULL);
25
- return out != NULL;
26
- }
27
-
28
- static int pthread_join(pthread_t thread, void* unused) {
29
- return (int) WaitForSingleObject(thread, INFINITE);
30
- }
31
-