Spaces:
Running
Running
Adam Debono
commited on
ruby : Add ruby binding for max_len (#3365)
Browse files* add ruby binding for max_len
* add test, update param numbers
bindings/ruby/ext/ruby_whisper_params.c
CHANGED
|
@@ -26,7 +26,7 @@
|
|
| 26 |
rb_define_method(cParams, #param_name, ruby_whisper_params_get_ ## param_name, 0); \
|
| 27 |
rb_define_method(cParams, #param_name "=", ruby_whisper_params_set_ ## param_name, 1);
|
| 28 |
|
| 29 |
-
#define RUBY_WHISPER_PARAMS_PARAM_NAMES_COUNT
|
| 30 |
|
| 31 |
extern VALUE cParams;
|
| 32 |
extern VALUE cVADParams;
|
|
@@ -49,6 +49,7 @@ static ID id_print_timestamps;
|
|
| 49 |
static ID id_suppress_blank;
|
| 50 |
static ID id_suppress_nst;
|
| 51 |
static ID id_token_timestamps;
|
|
|
|
| 52 |
static ID id_split_on_word;
|
| 53 |
static ID id_initial_prompt;
|
| 54 |
static ID id_diarize;
|
|
@@ -514,6 +515,33 @@ ruby_whisper_params_set_token_timestamps(VALUE self, VALUE value)
|
|
| 514 |
{
|
| 515 |
BOOL_PARAMS_SETTER(self, token_timestamps, value)
|
| 516 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 517 |
/*
|
| 518 |
* If true, split on word rather than on token (when used with max_len).
|
| 519 |
*
|
|
@@ -1137,6 +1165,7 @@ ruby_whisper_params_initialize(int argc, VALUE *argv, VALUE self)
|
|
| 1137 |
SET_PARAM_IF_SAME(suppress_blank)
|
| 1138 |
SET_PARAM_IF_SAME(suppress_nst)
|
| 1139 |
SET_PARAM_IF_SAME(token_timestamps)
|
|
|
|
| 1140 |
SET_PARAM_IF_SAME(split_on_word)
|
| 1141 |
SET_PARAM_IF_SAME(initial_prompt)
|
| 1142 |
SET_PARAM_IF_SAME(offset)
|
|
@@ -1271,30 +1300,31 @@ init_ruby_whisper_params(VALUE *mWhisper)
|
|
| 1271 |
DEFINE_PARAM(suppress_blank, 8)
|
| 1272 |
DEFINE_PARAM(suppress_nst, 9)
|
| 1273 |
DEFINE_PARAM(token_timestamps, 10)
|
| 1274 |
-
DEFINE_PARAM(
|
| 1275 |
-
DEFINE_PARAM(
|
| 1276 |
-
DEFINE_PARAM(
|
| 1277 |
-
DEFINE_PARAM(
|
| 1278 |
-
DEFINE_PARAM(
|
| 1279 |
-
DEFINE_PARAM(
|
| 1280 |
-
DEFINE_PARAM(
|
| 1281 |
-
DEFINE_PARAM(
|
| 1282 |
-
DEFINE_PARAM(
|
| 1283 |
-
DEFINE_PARAM(
|
| 1284 |
-
DEFINE_PARAM(
|
| 1285 |
-
DEFINE_PARAM(
|
| 1286 |
-
DEFINE_PARAM(
|
| 1287 |
-
DEFINE_PARAM(
|
| 1288 |
-
DEFINE_PARAM(
|
| 1289 |
-
DEFINE_PARAM(
|
| 1290 |
-
DEFINE_PARAM(
|
| 1291 |
-
DEFINE_PARAM(
|
| 1292 |
-
DEFINE_PARAM(
|
| 1293 |
-
DEFINE_PARAM(
|
| 1294 |
-
DEFINE_PARAM(
|
| 1295 |
-
DEFINE_PARAM(
|
| 1296 |
-
DEFINE_PARAM(
|
| 1297 |
-
DEFINE_PARAM(
|
|
|
|
| 1298 |
|
| 1299 |
rb_define_method(cParams, "on_new_segment", ruby_whisper_params_on_new_segment, 0);
|
| 1300 |
rb_define_method(cParams, "on_progress", ruby_whisper_params_on_progress, 0);
|
|
|
|
| 26 |
rb_define_method(cParams, #param_name, ruby_whisper_params_get_ ## param_name, 0); \
|
| 27 |
rb_define_method(cParams, #param_name "=", ruby_whisper_params_set_ ## param_name, 1);
|
| 28 |
|
| 29 |
+
#define RUBY_WHISPER_PARAMS_PARAM_NAMES_COUNT 36
|
| 30 |
|
| 31 |
extern VALUE cParams;
|
| 32 |
extern VALUE cVADParams;
|
|
|
|
| 49 |
static ID id_suppress_blank;
|
| 50 |
static ID id_suppress_nst;
|
| 51 |
static ID id_token_timestamps;
|
| 52 |
+
static ID id_max_len;
|
| 53 |
static ID id_split_on_word;
|
| 54 |
static ID id_initial_prompt;
|
| 55 |
static ID id_diarize;
|
|
|
|
| 515 |
{
|
| 516 |
BOOL_PARAMS_SETTER(self, token_timestamps, value)
|
| 517 |
}
|
| 518 |
+
|
| 519 |
+
/*
|
| 520 |
+
* max segment length in characters.
|
| 521 |
+
*
|
| 522 |
+
* call-seq:
|
| 523 |
+
* max_len -> Integer
|
| 524 |
+
*/
|
| 525 |
+
static VALUE
|
| 526 |
+
ruby_whisper_params_get_max_len(VALUE self)
|
| 527 |
+
{
|
| 528 |
+
ruby_whisper_params *rwp;
|
| 529 |
+
TypedData_Get_Struct(self, ruby_whisper_params, &ruby_whisper_params_type, rwp);
|
| 530 |
+
return INT2NUM(rwp->params.max_len);
|
| 531 |
+
}
|
| 532 |
+
/*
|
| 533 |
+
* call-seq:
|
| 534 |
+
* max_len = length -> length
|
| 535 |
+
*/
|
| 536 |
+
static VALUE
|
| 537 |
+
ruby_whisper_params_set_max_len(VALUE self, VALUE value)
|
| 538 |
+
{
|
| 539 |
+
ruby_whisper_params *rwp;
|
| 540 |
+
TypedData_Get_Struct(self, ruby_whisper_params, &ruby_whisper_params_type, rwp);
|
| 541 |
+
rwp->params.max_len = NUM2INT(value);
|
| 542 |
+
return value;
|
| 543 |
+
}
|
| 544 |
+
|
| 545 |
/*
|
| 546 |
* If true, split on word rather than on token (when used with max_len).
|
| 547 |
*
|
|
|
|
| 1165 |
SET_PARAM_IF_SAME(suppress_blank)
|
| 1166 |
SET_PARAM_IF_SAME(suppress_nst)
|
| 1167 |
SET_PARAM_IF_SAME(token_timestamps)
|
| 1168 |
+
SET_PARAM_IF_SAME(max_len)
|
| 1169 |
SET_PARAM_IF_SAME(split_on_word)
|
| 1170 |
SET_PARAM_IF_SAME(initial_prompt)
|
| 1171 |
SET_PARAM_IF_SAME(offset)
|
|
|
|
| 1300 |
DEFINE_PARAM(suppress_blank, 8)
|
| 1301 |
DEFINE_PARAM(suppress_nst, 9)
|
| 1302 |
DEFINE_PARAM(token_timestamps, 10)
|
| 1303 |
+
DEFINE_PARAM(max_len, 11)
|
| 1304 |
+
DEFINE_PARAM(split_on_word, 12)
|
| 1305 |
+
DEFINE_PARAM(initial_prompt, 13)
|
| 1306 |
+
DEFINE_PARAM(diarize, 14)
|
| 1307 |
+
DEFINE_PARAM(offset, 15)
|
| 1308 |
+
DEFINE_PARAM(duration, 16)
|
| 1309 |
+
DEFINE_PARAM(max_text_tokens, 17)
|
| 1310 |
+
DEFINE_PARAM(temperature, 18)
|
| 1311 |
+
DEFINE_PARAM(max_initial_ts, 19)
|
| 1312 |
+
DEFINE_PARAM(length_penalty, 20)
|
| 1313 |
+
DEFINE_PARAM(temperature_inc, 21)
|
| 1314 |
+
DEFINE_PARAM(entropy_thold, 22)
|
| 1315 |
+
DEFINE_PARAM(logprob_thold, 23)
|
| 1316 |
+
DEFINE_PARAM(no_speech_thold, 24)
|
| 1317 |
+
DEFINE_PARAM(new_segment_callback, 25)
|
| 1318 |
+
DEFINE_PARAM(new_segment_callback_user_data, 26)
|
| 1319 |
+
DEFINE_PARAM(progress_callback, 27)
|
| 1320 |
+
DEFINE_PARAM(progress_callback_user_data, 28)
|
| 1321 |
+
DEFINE_PARAM(encoder_begin_callback, 29)
|
| 1322 |
+
DEFINE_PARAM(encoder_begin_callback_user_data, 30)
|
| 1323 |
+
DEFINE_PARAM(abort_callback, 31)
|
| 1324 |
+
DEFINE_PARAM(abort_callback_user_data, 32)
|
| 1325 |
+
DEFINE_PARAM(vad, 33)
|
| 1326 |
+
DEFINE_PARAM(vad_model_path, 34)
|
| 1327 |
+
DEFINE_PARAM(vad_params, 35)
|
| 1328 |
|
| 1329 |
rb_define_method(cParams, "on_new_segment", ruby_whisper_params_on_new_segment, 0);
|
| 1330 |
rb_define_method(cParams, "on_progress", ruby_whisper_params_on_progress, 0);
|
bindings/ruby/sig/whisper.rbs
CHANGED
|
@@ -135,6 +135,7 @@ module Whisper
|
|
| 135 |
?suppress_blank: boolish,
|
| 136 |
?suppress_nst: boolish,
|
| 137 |
?token_timestamps: boolish,
|
|
|
|
| 138 |
?split_on_word: boolish,
|
| 139 |
?initial_prompt: string | nil,
|
| 140 |
?diarize: boolish,
|
|
@@ -222,6 +223,12 @@ module Whisper
|
|
| 222 |
#
|
| 223 |
def token_timestamps: () -> (true | false)
|
| 224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
def split_on_word=: (boolish) -> boolish
|
| 226 |
|
| 227 |
# If true, split on word rather than on token (when used with max_len).
|
|
|
|
| 135 |
?suppress_blank: boolish,
|
| 136 |
?suppress_nst: boolish,
|
| 137 |
?token_timestamps: boolish,
|
| 138 |
+
?max_len: Integer,
|
| 139 |
?split_on_word: boolish,
|
| 140 |
?initial_prompt: string | nil,
|
| 141 |
?diarize: boolish,
|
|
|
|
| 223 |
#
|
| 224 |
def token_timestamps: () -> (true | false)
|
| 225 |
|
| 226 |
+
def max_len=: (Integer) -> Integer
|
| 227 |
+
|
| 228 |
+
# max segment length in characters.
|
| 229 |
+
#
|
| 230 |
+
def max_len: () -> Integer
|
| 231 |
+
|
| 232 |
def split_on_word=: (boolish) -> boolish
|
| 233 |
|
| 234 |
# If true, split on word rather than on token (when used with max_len).
|
bindings/ruby/test/test_params.rb
CHANGED
|
@@ -13,6 +13,7 @@ class TestParams < TestBase
|
|
| 13 |
:suppress_blank,
|
| 14 |
:suppress_nst,
|
| 15 |
:token_timestamps,
|
|
|
|
| 16 |
:split_on_word,
|
| 17 |
:initial_prompt,
|
| 18 |
:diarize,
|
|
@@ -139,6 +140,13 @@ class TestParams < TestBase
|
|
| 139 |
assert [email protected]_timestamps
|
| 140 |
end
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
def test_split_on_word
|
| 143 |
@params.split_on_word = true
|
| 144 |
assert @params.split_on_word
|
|
|
|
| 13 |
:suppress_blank,
|
| 14 |
:suppress_nst,
|
| 15 |
:token_timestamps,
|
| 16 |
+
:max_len,
|
| 17 |
:split_on_word,
|
| 18 |
:initial_prompt,
|
| 19 |
:diarize,
|
|
|
|
| 140 |
assert [email protected]_timestamps
|
| 141 |
end
|
| 142 |
|
| 143 |
+
def test_max_len
|
| 144 |
+
@params.max_len = 42
|
| 145 |
+
assert_equal @params.max_len, 42
|
| 146 |
+
@params.max_len = 0
|
| 147 |
+
assert_equal @params.max_len, 0
|
| 148 |
+
end
|
| 149 |
+
|
| 150 |
def test_split_on_word
|
| 151 |
@params.split_on_word = true
|
| 152 |
assert @params.split_on_word
|