Todd ggerganov commited on
Commit
ad05850
·
unverified ·
1 Parent(s): 3b6ce25

bindings : add Ruby (#500)

Browse files

* adding ruby bindings

* avoid adding these they are copied in via extconf.rb

* ignore these files here

* add definitions for boolean params

* initial transcribe for ruby

* use en model and transcribe jfk with assertion

* possibly this works for building ruby binding

* ci : try to add ruby workflow

---------

Co-authored-by: Georgi Gerganov <[email protected]>

.github/workflows/{bindings.yml → bindings-go.yml} RENAMED
@@ -1,4 +1,4 @@
1
- name: Bindings Tests
2
  on:
3
  push:
4
  paths:
 
1
+ name: Bindings Tests (Go)
2
  on:
3
  push:
4
  paths:
.github/workflows/bindings-ruby.yml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bindings Tests (Ruby)
2
+ on:
3
+ push:
4
+ paths:
5
+ - bindings/ruby/**
6
+ - whisper.h
7
+ pull_request:
8
+ paths:
9
+ - bindings/ruby/**
10
+ - whisper.h
11
+
12
+ jobs:
13
+ ubuntu-latest:
14
+ runs-on: ubuntu-latest
15
+ steps:
16
+ - uses: ruby/setup-ruby@v1
17
+ with:
18
+ ruby-version: '3.0'
19
+ - uses: actions/checkout@v1
20
+ - run: |
21
+ cd bindings/ruby/ext
22
+ ruby extconf.rb && make
bindings/ruby/ext/.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Makefile
2
+ ggml.c
3
+ ggml.h
4
+ whisper.bundle
5
+ whisper.cpp
6
+ whisper.h
7
+ dr_wav.h
bindings/ruby/ext/extconf.rb ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ require 'mkmf'
2
+ system("cp #{File.join(File.dirname(__FILE__),'..','..','..','whisper.cpp')} .")
3
+ system("cp #{File.join(File.dirname(__FILE__),'..','..','..','whisper.h')} .")
4
+ system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.h')} .")
5
+ system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.c')} .")
6
+ system("cp #{File.join(File.dirname(__FILE__),'..','..','..','examples','dr_wav.h')} .")
7
+
8
+
9
+ # need to use c++ compiler flags
10
+ $CXXFLAGS << ' -std=c++11'
11
+ # Set to true when building binary gems
12
+ if enable_config('static-stdlib', false)
13
+ $LDFLAGS << ' -static-libgcc -static-libstdc++'
14
+ end
15
+
16
+ if enable_config('march-tune-native', false)
17
+ $CFLAGS << ' -march=native -mtune=native'
18
+ $CXXFLAGS << ' -march=native -mtune=native'
19
+ end
20
+
21
+ create_makefile('whisper')
bindings/ruby/ext/ruby_whisper.cpp ADDED
@@ -0,0 +1,426 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <ruby.h>
2
+ #include "ruby_whisper.h"
3
+ #define DR_WAV_IMPLEMENTATION
4
+ #include "dr_wav.h"
5
+ #include <cmath>
6
+ #include <fstream>
7
+ #include <cstdio>
8
+ #include <string>
9
+ #include <thread>
10
+ #include <vector>
11
+
12
+ #ifdef __cplusplus
13
+ extern "C" {
14
+ #endif
15
+
16
+ #define BOOL_PARAMS_SETTER(self, prop, value) \
17
+ ruby_whisper_params *rwp; \
18
+ Data_Get_Struct(self, ruby_whisper_params, rwp); \
19
+ if (value == Qfalse || value == Qnil) { \
20
+ rwp->params.prop = false; \
21
+ } else { \
22
+ rwp->params.prop = true; \
23
+ } \
24
+ return value; \
25
+
26
+ #define BOOL_PARAMS_GETTER(self, prop) \
27
+ ruby_whisper_params *rwp; \
28
+ Data_Get_Struct(self, ruby_whisper_params, rwp); \
29
+ if (rwp->params.prop) { \
30
+ return Qtrue; \
31
+ } else { \
32
+ return Qfalse; \
33
+ }
34
+
35
+ VALUE mWhisper;
36
+ VALUE cContext;
37
+ VALUE cParams;
38
+
39
+ static void ruby_whisper_free(ruby_whisper *rw) {
40
+ if (rw->context) {
41
+ whisper_free(rw->context);
42
+ rw->context = NULL;
43
+ }
44
+ }
45
+ static void ruby_whisper_params_free(ruby_whisper_params *rwp) {
46
+ }
47
+
48
+ void rb_whisper_mark(ruby_whisper *rw) {
49
+ // call rb_gc_mark on any ruby references in rw
50
+ }
51
+
52
+ void rb_whisper_free(ruby_whisper *rw) {
53
+ ruby_whisper_free(rw);
54
+ free(rw);
55
+ }
56
+
57
+ void rb_whisper_params_mark(ruby_whisper_params *rwp) {
58
+ }
59
+
60
+ void rb_whisper_params_free(ruby_whisper_params *rwp) {
61
+ ruby_whisper_params_free(rwp);
62
+ free(rwp);
63
+ }
64
+
65
+ static VALUE ruby_whisper_allocate(VALUE klass) {
66
+ ruby_whisper *rw;
67
+ rw = ALLOC(ruby_whisper);
68
+ rw->context = NULL;
69
+ return Data_Wrap_Struct(klass, rb_whisper_mark, rb_whisper_free, rw);
70
+ }
71
+
72
+ static VALUE ruby_whisper_params_allocate(VALUE klass) {
73
+ ruby_whisper_params *rwp;
74
+ rwp = ALLOC(ruby_whisper_params);
75
+ rwp->params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
76
+ return Data_Wrap_Struct(klass, rb_whisper_params_mark, rb_whisper_params_free, rwp);
77
+ }
78
+
79
+ static VALUE ruby_whisper_initialize(int argc, VALUE *argv, VALUE self) {
80
+ ruby_whisper *rw;
81
+ VALUE whisper_model_file_path;
82
+
83
+ // TODO: we can support init from buffer here too maybe another ruby object to expose
84
+ rb_scan_args(argc, argv, "01", &whisper_model_file_path);
85
+ Data_Get_Struct(self, ruby_whisper, rw);
86
+
87
+ if (!rb_respond_to(whisper_model_file_path, rb_intern("to_s"))) {
88
+ rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Whisper::Context");
89
+ }
90
+ rw->context = whisper_init_from_file(StringValueCStr(whisper_model_file_path));
91
+ if (rw->context == nullptr) {
92
+ rb_raise(rb_eRuntimeError, "error: failed to initialize whisper context");
93
+ }
94
+ return self;
95
+ }
96
+
97
+ /*
98
+ * transcribe a single file
99
+ * can emit to a block results
100
+ *
101
+ **/
102
+ static VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) {
103
+ ruby_whisper *rw;
104
+ ruby_whisper_params *rwp;
105
+ VALUE wave_file_path, blk, params;
106
+
107
+ rb_scan_args(argc, argv, "02&", &wave_file_path, &params, &blk);
108
+ Data_Get_Struct(self, ruby_whisper, rw);
109
+ Data_Get_Struct(params, ruby_whisper_params, rwp);
110
+
111
+ if (!rb_respond_to(wave_file_path, rb_intern("to_s"))) {
112
+ rb_raise(rb_eRuntimeError, "Expected file path to wave file");
113
+ }
114
+
115
+ std::string fname_inp = StringValueCStr(wave_file_path);
116
+
117
+ std::vector<float> pcmf32; // mono-channel F32 PCM
118
+ std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
119
+
120
+ // WAV input - this is directly from main.cpp example
121
+ {
122
+ drwav wav;
123
+ std::vector<uint8_t> wav_data; // used for pipe input from stdin
124
+
125
+ if (fname_inp == "-") {
126
+ {
127
+ uint8_t buf[1024];
128
+ while (true) {
129
+ const size_t n = fread(buf, 1, sizeof(buf), stdin);
130
+ if (n == 0) {
131
+ break;
132
+ }
133
+ wav_data.insert(wav_data.end(), buf, buf + n);
134
+ }
135
+ }
136
+
137
+ if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
138
+ fprintf(stderr, "error: failed to open WAV file from stdin\n");
139
+ return self;
140
+ }
141
+
142
+ fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size());
143
+ } else if (drwav_init_file(&wav, fname_inp.c_str(), nullptr) == false) {
144
+ fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
145
+ return self;
146
+ }
147
+
148
+ if (wav.channels != 1 && wav.channels != 2) {
149
+ fprintf(stderr, "WAV file '%s' must be mono or stereo\n", fname_inp.c_str());
150
+ return self;
151
+ }
152
+
153
+ if (rwp->diarize && wav.channels != 2 && rwp->params.print_timestamps == false) {
154
+ fprintf(stderr, "WAV file '%s' must be stereo for diarization and timestamps have to be enabled\n", fname_inp.c_str());
155
+ return self;
156
+ }
157
+
158
+ if (wav.sampleRate != WHISPER_SAMPLE_RATE) {
159
+ fprintf(stderr, "WAV file '%s' must be %i kHz\n", fname_inp.c_str(), WHISPER_SAMPLE_RATE/1000);
160
+ return self;
161
+ }
162
+
163
+ if (wav.bitsPerSample != 16) {
164
+ fprintf(stderr, "WAV file '%s' must be 16-bit\n", fname_inp.c_str());
165
+ return self;
166
+ }
167
+
168
+ const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8);
169
+
170
+ std::vector<int16_t> pcm16;
171
+ pcm16.resize(n*wav.channels);
172
+ drwav_read_pcm_frames_s16(&wav, n, pcm16.data());
173
+ drwav_uninit(&wav);
174
+
175
+ // convert to mono, float
176
+ pcmf32.resize(n);
177
+ if (wav.channels == 1) {
178
+ for (uint64_t i = 0; i < n; i++) {
179
+ pcmf32[i] = float(pcm16[i])/32768.0f;
180
+ }
181
+ } else {
182
+ for (uint64_t i = 0; i < n; i++) {
183
+ pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
184
+ }
185
+ }
186
+
187
+ if (rwp->diarize) {
188
+ // convert to stereo, float
189
+ pcmf32s.resize(2);
190
+
191
+ pcmf32s[0].resize(n);
192
+ pcmf32s[1].resize(n);
193
+ for (uint64_t i = 0; i < n; i++) {
194
+ pcmf32s[0][i] = float(pcm16[2*i])/32768.0f;
195
+ pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f;
196
+ }
197
+ }
198
+ }
199
+ {
200
+ static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
201
+
202
+ rwp->params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, void * user_data) {
203
+ bool is_aborted = *(bool*)user_data;
204
+ return !is_aborted;
205
+ };
206
+ rwp->params.encoder_begin_callback_user_data = &is_aborted;
207
+ }
208
+
209
+ if (whisper_full_parallel(rw->context, rwp->params, pcmf32.data(), pcmf32.size(), 1) != 0) {
210
+ fprintf(stderr, "failed to process audio\n");
211
+ return self;
212
+ }
213
+ const int n_segments = whisper_full_n_segments(rw->context);
214
+ VALUE output = rb_str_new2("");
215
+ for (int i = 0; i < n_segments; ++i) {
216
+ const char * text = whisper_full_get_segment_text(rw->context, i);
217
+ output = rb_str_concat(output, rb_str_new2(text));
218
+ }
219
+ VALUE idCall = rb_intern("call");
220
+ if (blk != Qnil) {
221
+ rb_funcall(blk, idCall, 1, output);
222
+ }
223
+ return self;
224
+ }
225
+
226
+ /*
227
+ * params.language = "auto" | "en", etc...
228
+ */
229
+ static VALUE ruby_whisper_params_set_language(VALUE self, VALUE value) {
230
+ ruby_whisper_params *rwp;
231
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
232
+ if (value == Qfalse || value == Qnil) {
233
+ rwp->params.language = "auto";
234
+ } else {
235
+ rwp->params.language = StringValueCStr(value);
236
+ }
237
+ return value;
238
+ }
239
+ static VALUE ruby_whisper_params_get_language(VALUE self) {
240
+ ruby_whisper_params *rwp;
241
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
242
+ if (rwp->params.language) {
243
+ return rb_str_new2(rwp->params.language);
244
+ } else {
245
+ return rb_str_new2("auto");
246
+ }
247
+ }
248
+ static VALUE ruby_whisper_params_set_translate(VALUE self, VALUE value) {
249
+ BOOL_PARAMS_SETTER(self, translate, value)
250
+ }
251
+ static VALUE ruby_whisper_params_get_translate(VALUE self) {
252
+ BOOL_PARAMS_GETTER(self, translate)
253
+ }
254
+ static VALUE ruby_whisper_params_set_no_context(VALUE self, VALUE value) {
255
+ BOOL_PARAMS_SETTER(self, no_context, value)
256
+ }
257
+ static VALUE ruby_whisper_params_get_no_context(VALUE self) {
258
+ BOOL_PARAMS_GETTER(self, no_context)
259
+ }
260
+ static VALUE ruby_whisper_params_set_single_segment(VALUE self, VALUE value) {
261
+ BOOL_PARAMS_SETTER(self, single_segment, value)
262
+ }
263
+ static VALUE ruby_whisper_params_get_single_segment(VALUE self) {
264
+ BOOL_PARAMS_GETTER(self, single_segment)
265
+ }
266
+ static VALUE ruby_whisper_params_set_print_special(VALUE self, VALUE value) {
267
+ BOOL_PARAMS_SETTER(self, print_special, value)
268
+ }
269
+ static VALUE ruby_whisper_params_get_print_special(VALUE self) {
270
+ BOOL_PARAMS_GETTER(self, print_special)
271
+ }
272
+ static VALUE ruby_whisper_params_set_print_progress(VALUE self, VALUE value) {
273
+ BOOL_PARAMS_SETTER(self, print_progress, value)
274
+ }
275
+ static VALUE ruby_whisper_params_get_print_progress(VALUE self) {
276
+ BOOL_PARAMS_GETTER(self, print_progress)
277
+ }
278
+ static VALUE ruby_whisper_params_set_print_realtime(VALUE self, VALUE value) {
279
+ BOOL_PARAMS_SETTER(self, print_realtime, value)
280
+ }
281
+ static VALUE ruby_whisper_params_get_print_realtime(VALUE self) {
282
+ BOOL_PARAMS_GETTER(self, print_realtime)
283
+ }
284
+ static VALUE ruby_whisper_params_set_print_timestamps(VALUE self, VALUE value) {
285
+ BOOL_PARAMS_SETTER(self, print_timestamps, value)
286
+ }
287
+ static VALUE ruby_whisper_params_get_print_timestamps(VALUE self) {
288
+ BOOL_PARAMS_GETTER(self, print_timestamps)
289
+ }
290
+ static VALUE ruby_whisper_params_set_suppress_blank(VALUE self, VALUE value) {
291
+ BOOL_PARAMS_SETTER(self, suppress_blank, value)
292
+ }
293
+ static VALUE ruby_whisper_params_get_suppress_blank(VALUE self) {
294
+ BOOL_PARAMS_GETTER(self, suppress_blank)
295
+ }
296
+ static VALUE ruby_whisper_params_set_suppress_non_speech_tokens(VALUE self, VALUE value) {
297
+ BOOL_PARAMS_SETTER(self, suppress_non_speech_tokens, value)
298
+ }
299
+ static VALUE ruby_whisper_params_get_suppress_non_speech_tokens(VALUE self) {
300
+ BOOL_PARAMS_GETTER(self, suppress_non_speech_tokens)
301
+ }
302
+ static VALUE ruby_whisper_params_get_token_timestamps(VALUE self) {
303
+ BOOL_PARAMS_GETTER(self, token_timestamps)
304
+ }
305
+ static VALUE ruby_whisper_params_set_token_timestamps(VALUE self, VALUE value) {
306
+ BOOL_PARAMS_SETTER(self, token_timestamps, value)
307
+ }
308
+ static VALUE ruby_whisper_params_get_split_on_word(VALUE self) {
309
+ BOOL_PARAMS_GETTER(self, split_on_word)
310
+ }
311
+ static VALUE ruby_whisper_params_set_split_on_word(VALUE self, VALUE value) {
312
+ BOOL_PARAMS_SETTER(self, split_on_word, value)
313
+ }
314
+ static VALUE ruby_whisper_params_get_speed_up(VALUE self) {
315
+ BOOL_PARAMS_GETTER(self, speed_up)
316
+ }
317
+ static VALUE ruby_whisper_params_set_speed_up(VALUE self, VALUE value) {
318
+ BOOL_PARAMS_SETTER(self, speed_up, value)
319
+ }
320
+ static VALUE ruby_whisper_params_get_diarize(VALUE self) {
321
+ ruby_whisper_params *rwp;
322
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
323
+ if (rwp->diarize) {
324
+ return Qtrue;
325
+ } else {
326
+ return Qfalse;
327
+ }
328
+ }
329
+ static VALUE ruby_whisper_params_set_diarize(VALUE self, VALUE value) {
330
+ ruby_whisper_params *rwp;
331
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
332
+ if (value == Qfalse || value == Qnil) {
333
+ rwp->diarize = false;
334
+ } else {
335
+ rwp->diarize = true;
336
+ } \
337
+ return value;
338
+ }
339
+
340
+ static VALUE ruby_whisper_params_get_offset(VALUE self) {
341
+ ruby_whisper_params *rwp;
342
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
343
+ return INT2NUM(rwp->params.offset_ms);
344
+ }
345
+ static VALUE ruby_whisper_params_set_offset(VALUE self, VALUE value) {
346
+ ruby_whisper_params *rwp;
347
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
348
+ rwp->params.offset_ms = NUM2INT(value);
349
+ return value;
350
+ }
351
+ static VALUE ruby_whisper_params_get_duration(VALUE self) {
352
+ ruby_whisper_params *rwp;
353
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
354
+ return INT2NUM(rwp->params.duration_ms);
355
+ }
356
+ static VALUE ruby_whisper_params_set_duration(VALUE self, VALUE value) {
357
+ ruby_whisper_params *rwp;
358
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
359
+ rwp->params.duration_ms = NUM2INT(value);
360
+ return value;
361
+ }
362
+
363
+ static VALUE ruby_whisper_params_get_max_text_tokens(VALUE self) {
364
+ ruby_whisper_params *rwp;
365
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
366
+ return INT2NUM(rwp->params.n_max_text_ctx);
367
+ }
368
+ static VALUE ruby_whisper_params_set_max_text_tokens(VALUE self, VALUE value) {
369
+ ruby_whisper_params *rwp;
370
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
371
+ rwp->params.n_max_text_ctx = NUM2INT(value);
372
+ return value;
373
+ }
374
+
375
+ void Init_whisper() {
376
+ mWhisper = rb_define_module("Whisper");
377
+ cContext = rb_define_class_under(mWhisper, "Context", rb_cObject);
378
+ cParams = rb_define_class_under(mWhisper, "Params", rb_cObject);
379
+
380
+ rb_define_alloc_func(cContext, ruby_whisper_allocate);
381
+ rb_define_method(cContext, "initialize", ruby_whisper_initialize, -1);
382
+
383
+ rb_define_method(cContext, "transcribe", ruby_whisper_transcribe, -1);
384
+
385
+ rb_define_alloc_func(cParams, ruby_whisper_params_allocate);
386
+
387
+ rb_define_method(cParams, "language=", ruby_whisper_params_set_language, 1);
388
+ rb_define_method(cParams, "language", ruby_whisper_params_get_language, 0);
389
+ rb_define_method(cParams, "translate=", ruby_whisper_params_set_translate, 1);
390
+ rb_define_method(cParams, "translate", ruby_whisper_params_get_translate, 0);
391
+ rb_define_method(cParams, "no_context=", ruby_whisper_params_set_no_context, 1);
392
+ rb_define_method(cParams, "no_context", ruby_whisper_params_get_no_context, 0);
393
+ rb_define_method(cParams, "single_segment=", ruby_whisper_params_set_single_segment, 1);
394
+ rb_define_method(cParams, "single_segment", ruby_whisper_params_get_single_segment, 0);
395
+ rb_define_method(cParams, "print_special", ruby_whisper_params_get_print_special, 0);
396
+ rb_define_method(cParams, "print_special=", ruby_whisper_params_set_print_special, 1);
397
+ rb_define_method(cParams, "print_progress", ruby_whisper_params_get_print_progress, 0);
398
+ rb_define_method(cParams, "print_progress=", ruby_whisper_params_set_print_progress, 1);
399
+ rb_define_method(cParams, "print_realtime", ruby_whisper_params_get_print_realtime, 0);
400
+ rb_define_method(cParams, "print_realtime=", ruby_whisper_params_set_print_realtime, 1);
401
+ rb_define_method(cParams, "print_timestamps", ruby_whisper_params_get_print_timestamps, 0);
402
+ rb_define_method(cParams, "print_timestamps=", ruby_whisper_params_set_print_timestamps, 1);
403
+ rb_define_method(cParams, "suppress_blank", ruby_whisper_params_get_suppress_blank, 0);
404
+ rb_define_method(cParams, "suppress_blank=", ruby_whisper_params_set_suppress_blank, 1);
405
+ rb_define_method(cParams, "suppress_non_speech_tokens", ruby_whisper_params_get_suppress_non_speech_tokens, 0);
406
+ rb_define_method(cParams, "suppress_non_speech_tokens=", ruby_whisper_params_set_suppress_non_speech_tokens, 1);
407
+ rb_define_method(cParams, "token_timestamps", ruby_whisper_params_get_token_timestamps, 0);
408
+ rb_define_method(cParams, "token_timestamps=", ruby_whisper_params_set_token_timestamps, 1);
409
+ rb_define_method(cParams, "split_on_word", ruby_whisper_params_get_split_on_word, 0);
410
+ rb_define_method(cParams, "split_on_word=", ruby_whisper_params_set_split_on_word, 1);
411
+ rb_define_method(cParams, "speed_up", ruby_whisper_params_get_speed_up, 0);
412
+ rb_define_method(cParams, "speed_up=", ruby_whisper_params_set_speed_up, 1);
413
+ rb_define_method(cParams, "diarize", ruby_whisper_params_get_diarize, 0);
414
+ rb_define_method(cParams, "diarize=", ruby_whisper_params_set_diarize, 1);
415
+
416
+ rb_define_method(cParams, "offset", ruby_whisper_params_get_offset, 0);
417
+ rb_define_method(cParams, "offset=", ruby_whisper_params_set_offset, 1);
418
+ rb_define_method(cParams, "duration", ruby_whisper_params_get_duration, 0);
419
+ rb_define_method(cParams, "duration=", ruby_whisper_params_set_duration, 1);
420
+
421
+ rb_define_method(cParams, "max_text_tokens", ruby_whisper_params_get_max_text_tokens, 0);
422
+ rb_define_method(cParams, "max_text_tokens=", ruby_whisper_params_set_max_text_tokens, 1);
423
+ }
424
+ #ifdef __cplusplus
425
+ }
426
+ #endif
bindings/ruby/ext/ruby_whisper.h ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef __RUBY_WHISPER_H
2
+ #define __RUBY_WHISPER_H
3
+
4
+ #include "whisper.h"
5
+
6
+ typedef struct {
7
+ struct whisper_context *context;
8
+ } ruby_whisper;
9
+
10
+ typedef struct {
11
+ struct whisper_full_params params;
12
+ bool diarize;
13
+ } ruby_whisper_params;
14
+
15
+ #endif
bindings/ruby/tests/test_whisper.rb ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TOPDIR = File.expand_path(File.join(File.dirname(__FILE__), '..'))
2
+ EXTDIR = File.join(TOPDIR, 'ext')
3
+ #$LIBDIR = File.join(TOPDIR, 'lib')
4
+ #$:.unshift(LIBDIR)
5
+ $:.unshift(EXTDIR)
6
+
7
+ require 'whisper'
8
+ require 'test/unit'
9
+
10
+ class TestWhisper < Test::Unit::TestCase
11
+ def setup
12
+ @params = Whisper::Params.new
13
+ end
14
+
15
+ def test_language
16
+ @params.language = "en"
17
+ assert_equal @params.language, "en"
18
+ @params.language = "auto"
19
+ assert_equal @params.language, "auto"
20
+ end
21
+
22
+ def test_offset
23
+ @params.offset = 10_000
24
+ assert_equal @params.offset, 10_000
25
+ @params.offset = 0
26
+ assert_equal @params.offset, 0
27
+ end
28
+
29
+ def test_duration
30
+ @params.duration = 60_000
31
+ assert_equal @params.duration, 60_000
32
+ @params.duration = 0
33
+ assert_equal @params.duration, 0
34
+ end
35
+
36
+ def test_max_text_tokens
37
+ @params.max_text_tokens = 300
38
+ assert_equal @params.max_text_tokens, 300
39
+ @params.max_text_tokens = 0
40
+ assert_equal @params.max_text_tokens, 0
41
+ end
42
+
43
+ def test_translate
44
+ @params.translate = true
45
+ assert @params.translate
46
+ @params.translate = false
47
48
+ end
49
+
50
+ def test_no_context
51
+ @params.no_context = true
52
+ assert @params.no_context
53
+ @params.no_context = false
54
+ assert [email protected]_context
55
+ end
56
+
57
+ def test_single_segment
58
+ @params.single_segment = true
59
+ assert @params.single_segment
60
+ @params.single_segment = false
61
+ assert [email protected]_segment
62
+ end
63
+
64
+ def test_print_special
65
+ @params.print_special = true
66
+ assert @params.print_special
67
+ @params.print_special = false
68
+ assert [email protected]_special
69
+ end
70
+
71
+ def test_print_progress
72
+ @params.print_progress = true
73
+ assert @params.print_progress
74
+ @params.print_progress = false
75
+ assert [email protected]_progress
76
+ end
77
+
78
+ def test_print_realtime
79
+ @params.print_realtime = true
80
+ assert @params.print_realtime
81
+ @params.print_realtime = false
82
+ assert [email protected]_realtime
83
+ end
84
+
85
+ def test_print_timestamps
86
+ @params.print_timestamps = true
87
+ assert @params.print_timestamps
88
+ @params.print_timestamps = false
89
+ assert [email protected]_timestamps
90
+ end
91
+
92
+ def test_suppress_blank
93
+ @params.suppress_blank = true
94
+ assert @params.suppress_blank
95
+ @params.suppress_blank = false
96
+ assert [email protected]_blank
97
+ end
98
+
99
+ def test_suppress_non_speech_tokens
100
+ @params.suppress_non_speech_tokens = true
101
+ assert @params.suppress_non_speech_tokens
102
+ @params.suppress_non_speech_tokens = false
103
+ assert [email protected]_non_speech_tokens
104
+ end
105
+
106
+ def test_token_timestamps
107
+ @params.token_timestamps = true
108
+ assert @params.token_timestamps
109
+ @params.token_timestamps = false
110
+ assert [email protected]_timestamps
111
+ end
112
+
113
+ def test_split_on_word
114
+ @params.split_on_word = true
115
+ assert @params.split_on_word
116
+ @params.split_on_word = false
117
+ assert [email protected]_on_word
118
+ end
119
+
120
+ def test_speed_up
121
+ @params.speed_up = true
122
+ assert @params.speed_up
123
+ @params.speed_up = false
124
+ assert [email protected]_up
125
+ end
126
+
127
+ def test_whisper
128
+ @whisper = Whisper::Context.new(File.join(TOPDIR, '..', '..', 'models', 'ggml-base.en.bin'))
129
+ params = Whisper::Params.new
130
+ params.print_timestamps = false
131
+
132
+ jfk = File.join(TOPDIR, '..', '..', 'samples', 'jfk.wav')
133
+ @whisper.transcribe(jfk, params) {|text|
134
+ assert_match /ask not what your country can do for you, ask what you can do for your country/, text
135
+ }
136
+ end
137
+
138
+ end