Spaces:
Running
Running
whisper : make large version explicit + fix data size units (#1493)
Browse files- Makefile +2 -2
- README.md +8 -8
- bindings/go/examples/go-model-download/main.go +1 -1
- examples/livestream.sh +1 -1
- examples/twitch.sh +1 -1
- extra/convert-all.sh +1 -1
- ggml-metal.m +9 -9
- models/README.md +13 -13
- models/convert-h5-to-coreml.py +2 -2
- models/convert-whisper-to-coreml.py +2 -2
- models/convert-whisper-to-openvino.py +2 -2
- models/download-coreml-model.sh +1 -1
- models/download-ggml-model.cmd +1 -1
- models/download-ggml-model.sh +2 -2
- tests/run-tests.sh +1 -1
- whisper.cpp +22 -22
Makefile
CHANGED
|
@@ -418,9 +418,9 @@ samples:
|
|
| 418 |
.PHONY: medium
|
| 419 |
.PHONY: large-v1
|
| 420 |
.PHONY: large-v2
|
| 421 |
-
.PHONY: large
|
| 422 |
|
| 423 |
-
tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large: main
|
| 424 |
bash ./models/download-ggml-model.sh $@
|
| 425 |
@echo ""
|
| 426 |
@echo "==============================================="
|
|
|
|
| 418 |
.PHONY: medium
|
| 419 |
.PHONY: large-v1
|
| 420 |
.PHONY: large-v2
|
| 421 |
+
.PHONY: large-v3
|
| 422 |
|
| 423 |
+
tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3: main
|
| 424 |
bash ./models/download-ggml-model.sh $@
|
| 425 |
@echo ""
|
| 426 |
@echo "==============================================="
|
README.md
CHANGED
|
@@ -231,18 +231,18 @@ make medium.en
|
|
| 231 |
make medium
|
| 232 |
make large-v1
|
| 233 |
make large-v2
|
| 234 |
-
make large
|
| 235 |
```
|
| 236 |
|
| 237 |
## Memory usage
|
| 238 |
|
| 239 |
-
| Model | Disk
|
| 240 |
-
| --- | ---
|
| 241 |
-
| tiny | 75
|
| 242 |
-
| base | 142
|
| 243 |
-
| small | 466
|
| 244 |
-
| medium | 1.5
|
| 245 |
-
| large | 2.9
|
| 246 |
|
| 247 |
## Quantization
|
| 248 |
|
|
|
|
| 231 |
make medium
|
| 232 |
make large-v1
|
| 233 |
make large-v2
|
| 234 |
+
make large-v3
|
| 235 |
```
|
| 236 |
|
| 237 |
## Memory usage
|
| 238 |
|
| 239 |
+
| Model | Disk | Mem |
|
| 240 |
+
| --- | --- | --- |
|
| 241 |
+
| tiny | 75 MiB | ~273 MB |
|
| 242 |
+
| base | 142 MiB | ~388 MB |
|
| 243 |
+
| small | 466 MiB | ~852 MB |
|
| 244 |
+
| medium | 1.5 GiB | ~2.1 GB |
|
| 245 |
+
| large | 2.9 GiB | ~3.9 GB |
|
| 246 |
|
| 247 |
## Quantization
|
| 248 |
|
bindings/go/examples/go-model-download/main.go
CHANGED
|
@@ -24,7 +24,7 @@ const (
|
|
| 24 |
|
| 25 |
var (
|
| 26 |
// The models which will be downloaded, if no model is specified as an argument
|
| 27 |
-
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large"}
|
| 28 |
)
|
| 29 |
|
| 30 |
var (
|
|
|
|
| 24 |
|
| 25 |
var (
|
| 26 |
// The models which will be downloaded, if no model is specified as an argument
|
| 27 |
+
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3"}
|
| 28 |
)
|
| 29 |
|
| 30 |
var (
|
examples/livestream.sh
CHANGED
|
@@ -48,7 +48,7 @@ if [ -n "$3" ]; then
|
|
| 48 |
fi
|
| 49 |
|
| 50 |
# Whisper models
|
| 51 |
-
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" )
|
| 52 |
|
| 53 |
# list available models
|
| 54 |
function list_models {
|
|
|
|
| 48 |
fi
|
| 49 |
|
| 50 |
# Whisper models
|
| 51 |
+
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
|
| 52 |
|
| 53 |
# list available models
|
| 54 |
function list_models {
|
examples/twitch.sh
CHANGED
|
@@ -21,7 +21,7 @@ help()
|
|
| 21 |
echo "Usage: ./twitch.sh -s [step] -m [model] -t [threads] [url]"
|
| 22 |
echo "options:"
|
| 23 |
echo "-s Step in seconds (default is $step)."
|
| 24 |
-
echo "-m Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large-v2' 'large' (default is '$model')."
|
| 25 |
echo "-t Number of threads to use."
|
| 26 |
echo "-h Print this help page."
|
| 27 |
echo
|
|
|
|
| 21 |
echo "Usage: ./twitch.sh -s [step] -m [model] -t [threads] [url]"
|
| 22 |
echo "options:"
|
| 23 |
echo "-s Step in seconds (default is $step)."
|
| 24 |
+
echo "-m Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large-v2' 'large-v3' (default is '$model')."
|
| 25 |
echo "-t Number of threads to use."
|
| 26 |
echo "-h Print this help page."
|
| 27 |
echo
|
extra/convert-all.sh
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
#!/bin/bash
|
| 2 |
|
| 3 |
-
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" )
|
| 4 |
|
| 5 |
for model in "${models[@]}"; do
|
| 6 |
python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
|
|
|
|
| 1 |
#!/bin/bash
|
| 2 |
|
| 3 |
+
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
|
| 4 |
|
| 5 |
for model in "${models[@]}"; do
|
| 6 |
python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
|
ggml-metal.m
CHANGED
|
@@ -346,9 +346,9 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|
| 346 |
}
|
| 347 |
|
| 348 |
GGML_METAL_LOG_INFO("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
|
| 349 |
-
GGML_METAL_LOG_INFO("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize /
|
| 350 |
if (ctx->device.maxTransferRate != 0) {
|
| 351 |
-
GGML_METAL_LOG_INFO("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate /
|
| 352 |
} else {
|
| 353 |
GGML_METAL_LOG_INFO("%s: maxTransferRate = built-in GPU\n", __func__);
|
| 354 |
}
|
|
@@ -541,11 +541,11 @@ bool ggml_metal_add_buffer(
|
|
| 541 |
ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:data length:size_aligned options:MTLResourceStorageModeShared deallocator:nil];
|
| 542 |
|
| 543 |
if (ctx->buffers[ctx->n_buffers].metal == nil) {
|
| 544 |
-
GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_aligned /
|
| 545 |
return false;
|
| 546 |
}
|
| 547 |
|
| 548 |
-
GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned /
|
| 549 |
|
| 550 |
++ctx->n_buffers;
|
| 551 |
} else {
|
|
@@ -565,11 +565,11 @@ bool ggml_metal_add_buffer(
|
|
| 565 |
ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:(void *) ((uint8_t *) data + i) length:size_step_aligned options:MTLResourceStorageModeShared deallocator:nil];
|
| 566 |
|
| 567 |
if (ctx->buffers[ctx->n_buffers].metal == nil) {
|
| 568 |
-
GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_step_aligned /
|
| 569 |
return false;
|
| 570 |
}
|
| 571 |
|
| 572 |
-
GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned /
|
| 573 |
if (i + size_step < size) {
|
| 574 |
GGML_METAL_LOG_INFO("\n");
|
| 575 |
}
|
|
@@ -580,8 +580,8 @@ bool ggml_metal_add_buffer(
|
|
| 580 |
|
| 581 |
#if TARGET_OS_OSX
|
| 582 |
GGML_METAL_LOG_INFO(", (%8.2f / %8.2f)",
|
| 583 |
-
ctx->device.currentAllocatedSize /
|
| 584 |
-
ctx->device.recommendedMaxWorkingSetSize /
|
| 585 |
|
| 586 |
if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) {
|
| 587 |
GGML_METAL_LOG_WARN("%s: warning: current allocated size is greater than the recommended max working set size\n", __func__);
|
|
@@ -589,7 +589,7 @@ bool ggml_metal_add_buffer(
|
|
| 589 |
GGML_METAL_LOG_INFO("\n");
|
| 590 |
}
|
| 591 |
#else
|
| 592 |
-
GGML_METAL_LOG_INFO(", (%8.2f)\n", ctx->device.currentAllocatedSize /
|
| 593 |
#endif
|
| 594 |
}
|
| 595 |
|
|
|
|
| 346 |
}
|
| 347 |
|
| 348 |
GGML_METAL_LOG_INFO("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
|
| 349 |
+
GGML_METAL_LOG_INFO("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1e6);
|
| 350 |
if (ctx->device.maxTransferRate != 0) {
|
| 351 |
+
GGML_METAL_LOG_INFO("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1e6);
|
| 352 |
} else {
|
| 353 |
GGML_METAL_LOG_INFO("%s: maxTransferRate = built-in GPU\n", __func__);
|
| 354 |
}
|
|
|
|
| 541 |
ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:data length:size_aligned options:MTLResourceStorageModeShared deallocator:nil];
|
| 542 |
|
| 543 |
if (ctx->buffers[ctx->n_buffers].metal == nil) {
|
| 544 |
+
GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_aligned / 1e6);
|
| 545 |
return false;
|
| 546 |
}
|
| 547 |
|
| 548 |
+
GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1e6);
|
| 549 |
|
| 550 |
++ctx->n_buffers;
|
| 551 |
} else {
|
|
|
|
| 565 |
ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:(void *) ((uint8_t *) data + i) length:size_step_aligned options:MTLResourceStorageModeShared deallocator:nil];
|
| 566 |
|
| 567 |
if (ctx->buffers[ctx->n_buffers].metal == nil) {
|
| 568 |
+
GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_step_aligned / 1e6);
|
| 569 |
return false;
|
| 570 |
}
|
| 571 |
|
| 572 |
+
GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1e6, i);
|
| 573 |
if (i + size_step < size) {
|
| 574 |
GGML_METAL_LOG_INFO("\n");
|
| 575 |
}
|
|
|
|
| 580 |
|
| 581 |
#if TARGET_OS_OSX
|
| 582 |
GGML_METAL_LOG_INFO(", (%8.2f / %8.2f)",
|
| 583 |
+
ctx->device.currentAllocatedSize / 1e6,
|
| 584 |
+
ctx->device.recommendedMaxWorkingSetSize / 1e6);
|
| 585 |
|
| 586 |
if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) {
|
| 587 |
GGML_METAL_LOG_WARN("%s: warning: current allocated size is greater than the recommended max working set size\n", __func__);
|
|
|
|
| 589 |
GGML_METAL_LOG_INFO("\n");
|
| 590 |
}
|
| 591 |
#else
|
| 592 |
+
GGML_METAL_LOG_INFO(", (%8.2f)\n", ctx->device.currentAllocatedSize / 1e6);
|
| 593 |
#endif
|
| 594 |
}
|
| 595 |
|
models/README.md
CHANGED
|
@@ -39,19 +39,19 @@ https://huggingface.co/ggerganov/whisper.cpp/tree/main
|
|
| 39 |
|
| 40 |
## Available models
|
| 41 |
|
| 42 |
-
| Model | Disk
|
| 43 |
-
| --- | ---
|
| 44 |
-
| tiny | 75
|
| 45 |
-
| tiny.en | 75
|
| 46 |
-
| base | 142
|
| 47 |
-
| base.en | 142
|
| 48 |
-
| small | 466
|
| 49 |
-
| small.en | 466
|
| 50 |
-
| medium | 1.5
|
| 51 |
-
| medium.en | 1.5
|
| 52 |
-
| large-v1 | 2.9
|
| 53 |
-
| large-v2 | 2.9
|
| 54 |
-
| large
|
| 55 |
|
| 56 |
## Model files for testing purposes
|
| 57 |
|
|
|
|
| 39 |
|
| 40 |
## Available models
|
| 41 |
|
| 42 |
+
| Model | Disk | SHA |
|
| 43 |
+
| --- | --- | --- |
|
| 44 |
+
| tiny | 75 MiB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` |
|
| 45 |
+
| tiny.en | 75 MiB | `c78c86eb1a8faa21b369bcd33207cc90d64ae9df` |
|
| 46 |
+
| base | 142 MiB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
|
| 47 |
+
| base.en | 142 MiB | `137c40403d78fd54d454da0f9bd998f78703390c` |
|
| 48 |
+
| small | 466 MiB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
|
| 49 |
+
| small.en | 466 MiB | `db8a495a91d927739e50b3fc1cc4c6b8f6c2d022` |
|
| 50 |
+
| medium | 1.5 GiB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
|
| 51 |
+
| medium.en | 1.5 GiB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` |
|
| 52 |
+
| large-v1 | 2.9 GiB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` |
|
| 53 |
+
| large-v2 | 2.9 GiB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
|
| 54 |
+
| large-v3 | 2.9 GiB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` |
|
| 55 |
|
| 56 |
## Model files for testing purposes
|
| 57 |
|
models/convert-h5-to-coreml.py
CHANGED
|
@@ -78,14 +78,14 @@ def convert_hf_whisper(hf_model_name_or_path: str, whisper_state_path: str):
|
|
| 78 |
# Ported from models/convert-whisper-to-coreml.py
|
| 79 |
if __name__ == "__main__":
|
| 80 |
parser = argparse.ArgumentParser()
|
| 81 |
-
parser.add_argument("--model-name", type=str, help="name of model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-
|
| 82 |
parser.add_argument("--model-path", type=str, help="path to the model (e.g. if published on HuggingFace: Oblivion208/whisper-tiny-cantonese)", required=True)
|
| 83 |
parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
|
| 84 |
parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
|
| 85 |
parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
|
| 86 |
args = parser.parse_args()
|
| 87 |
|
| 88 |
-
if args.model_name not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-
|
| 89 |
raise ValueError("Invalid model name")
|
| 90 |
|
| 91 |
pt_target_path = f"models/hf-{args.model_name}.pt"
|
|
|
|
| 78 |
# Ported from models/convert-whisper-to-coreml.py
|
| 79 |
if __name__ == "__main__":
|
| 80 |
parser = argparse.ArgumentParser()
|
| 81 |
+
parser.add_argument("--model-name", type=str, help="name of model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3)", required=True)
|
| 82 |
parser.add_argument("--model-path", type=str, help="path to the model (e.g. if published on HuggingFace: Oblivion208/whisper-tiny-cantonese)", required=True)
|
| 83 |
parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
|
| 84 |
parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
|
| 85 |
parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
|
| 86 |
args = parser.parse_args()
|
| 87 |
|
| 88 |
+
if args.model_name not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3"]:
|
| 89 |
raise ValueError("Invalid model name")
|
| 90 |
|
| 91 |
pt_target_path = f"models/hf-{args.model_name}.pt"
|
models/convert-whisper-to-coreml.py
CHANGED
|
@@ -296,13 +296,13 @@ def convert_decoder(hparams, model, quantize=False):
|
|
| 296 |
|
| 297 |
if __name__ == "__main__":
|
| 298 |
parser = argparse.ArgumentParser()
|
| 299 |
-
parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-
|
| 300 |
parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
|
| 301 |
parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
|
| 302 |
parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
|
| 303 |
args = parser.parse_args()
|
| 304 |
|
| 305 |
-
if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "small.en-tdrz", "medium", "medium.en", "large", "large-
|
| 306 |
raise ValueError("Invalid model name")
|
| 307 |
|
| 308 |
whisper = load_model(args.model).cpu()
|
|
|
|
| 296 |
|
| 297 |
if __name__ == "__main__":
|
| 298 |
parser = argparse.ArgumentParser()
|
| 299 |
+
parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3)", required=True)
|
| 300 |
parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
|
| 301 |
parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
|
| 302 |
parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
|
| 303 |
args = parser.parse_args()
|
| 304 |
|
| 305 |
+
if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "small.en-tdrz", "medium", "medium.en", "large-v1", "large-v2", "large-v3"]:
|
| 306 |
raise ValueError("Invalid model name")
|
| 307 |
|
| 308 |
whisper = load_model(args.model).cpu()
|
models/convert-whisper-to-openvino.py
CHANGED
|
@@ -38,10 +38,10 @@ def convert_encoder(hparams, encoder, mname):
|
|
| 38 |
|
| 39 |
if __name__ == "__main__":
|
| 40 |
parser = argparse.ArgumentParser()
|
| 41 |
-
parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-
|
| 42 |
args = parser.parse_args()
|
| 43 |
|
| 44 |
-
if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-
|
| 45 |
raise ValueError("Invalid model name")
|
| 46 |
|
| 47 |
whisper = load_model(args.model).cpu()
|
|
|
|
| 38 |
|
| 39 |
if __name__ == "__main__":
|
| 40 |
parser = argparse.ArgumentParser()
|
| 41 |
+
parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3)", required=True)
|
| 42 |
args = parser.parse_args()
|
| 43 |
|
| 44 |
+
if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3"]:
|
| 45 |
raise ValueError("Invalid model name")
|
| 46 |
|
| 47 |
whisper = load_model(args.model).cpu()
|
models/download-coreml-model.sh
CHANGED
|
@@ -19,7 +19,7 @@ function get_script_path() {
|
|
| 19 |
models_path="$(get_script_path)"
|
| 20 |
|
| 21 |
# Whisper models
|
| 22 |
-
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" )
|
| 23 |
|
| 24 |
# list available models
|
| 25 |
function list_models {
|
|
|
|
| 19 |
models_path="$(get_script_path)"
|
| 20 |
|
| 21 |
# Whisper models
|
| 22 |
+
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
|
| 23 |
|
| 24 |
# list available models
|
| 25 |
function list_models {
|
models/download-ggml-model.cmd
CHANGED
|
@@ -8,7 +8,7 @@ popd
|
|
| 8 |
set argc=0
|
| 9 |
for %%x in (%*) do set /A argc+=1
|
| 10 |
|
| 11 |
-
set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large
|
| 12 |
|
| 13 |
if %argc% neq 1 (
|
| 14 |
echo.
|
|
|
|
| 8 |
set argc=0
|
| 9 |
for %%x in (%*) do set /A argc+=1
|
| 10 |
|
| 11 |
+
set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3
|
| 12 |
|
| 13 |
if %argc% neq 1 (
|
| 14 |
echo.
|
models/download-ggml-model.sh
CHANGED
|
@@ -22,7 +22,7 @@ function get_script_path() {
|
|
| 22 |
models_path="$(get_script_path)"
|
| 23 |
|
| 24 |
# Whisper models
|
| 25 |
-
models=(
|
| 26 |
"tiny.en"
|
| 27 |
"tiny"
|
| 28 |
"tiny-q5_1"
|
|
@@ -42,7 +42,7 @@ models=(
|
|
| 42 |
"medium.en-q5_0"
|
| 43 |
"large-v1"
|
| 44 |
"large-v2"
|
| 45 |
-
"large"
|
| 46 |
"large-q5_0"
|
| 47 |
)
|
| 48 |
|
|
|
|
| 22 |
models_path="$(get_script_path)"
|
| 23 |
|
| 24 |
# Whisper models
|
| 25 |
+
models=(
|
| 26 |
"tiny.en"
|
| 27 |
"tiny"
|
| 28 |
"tiny-q5_1"
|
|
|
|
| 42 |
"medium.en-q5_0"
|
| 43 |
"large-v1"
|
| 44 |
"large-v2"
|
| 45 |
+
"large-v3"
|
| 46 |
"large-q5_0"
|
| 47 |
)
|
| 48 |
|
tests/run-tests.sh
CHANGED
|
@@ -19,7 +19,7 @@
|
|
| 19 |
cd `dirname $0`
|
| 20 |
|
| 21 |
# Whisper models
|
| 22 |
-
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" )
|
| 23 |
|
| 24 |
# list available models
|
| 25 |
function list_models {
|
|
|
|
| 19 |
cd `dirname $0`
|
| 20 |
|
| 21 |
# Whisper models
|
| 22 |
+
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
|
| 23 |
|
| 24 |
# list available models
|
| 25 |
function list_models {
|
whisper.cpp
CHANGED
|
@@ -1522,7 +1522,7 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
|
|
| 1522 |
|
| 1523 |
model.buffer = ggml_backend_alloc_buffer(wctx.backend, size_main);
|
| 1524 |
|
| 1525 |
-
WHISPER_LOG_INFO("%s: %8s buffer size = %8.2f MB\n", __func__, ggml_backend_name(wctx.backend), size_main /
|
| 1526 |
}
|
| 1527 |
|
| 1528 |
ggml_allocr * alloc = ggml_allocr_new_from_buffer(model.buffer);
|
|
@@ -1637,12 +1637,12 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
|
|
| 1637 |
ggml_backend_tensor_set(tensor, read_buf.data(), 0, ggml_nbytes(tensor));
|
| 1638 |
}
|
| 1639 |
|
| 1640 |
-
//printf("%48s - [%5d, %5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ne[2], ggml_type_name((ggml_type) ttype), ggml_nbytes(tensor)/
|
| 1641 |
total_size += ggml_nbytes(tensor);
|
| 1642 |
model.n_loaded++;
|
| 1643 |
}
|
| 1644 |
|
| 1645 |
-
WHISPER_LOG_INFO("%s: model size = %7.2f MB\n", __func__, total_size/
|
| 1646 |
|
| 1647 |
if (model.n_loaded == 0) {
|
| 1648 |
WHISPER_LOG_WARN("%s: WARN no tensors loaded from model file - assuming empty model for testing\n", __func__);
|
|
@@ -2027,11 +2027,11 @@ static struct ggml_cgraph * whisper_build_graph_encoder(
|
|
| 2027 |
////////////////////////////////////////////////////////////////////////////
|
| 2028 |
|
| 2029 |
//printf("%s: used_mem = %f MB, %f MB, %f MB %f MB %f MB\n", __func__,
|
| 2030 |
-
// ggml_used_mem(ctx0)/
|
| 2031 |
-
// wstate.get_buf_max_mem(0)/
|
| 2032 |
-
// wstate.get_buf_max_mem(1)/
|
| 2033 |
-
// wstate.get_buf_max_mem(2)/
|
| 2034 |
-
// wstate.get_buf_max_mem(3)/
|
| 2035 |
|
| 2036 |
ggml_free(ctx0);
|
| 2037 |
|
|
@@ -2613,11 +2613,11 @@ static bool whisper_decode_internal(
|
|
| 2613 |
|
| 2614 |
if (batch.n_tokens > 1) {
|
| 2615 |
//printf("%s: used_mem = %f MB, %f MB, %f MB %f MB %f MB\n", __func__,
|
| 2616 |
-
// ggml_used_mem(ctx0)/
|
| 2617 |
-
// wstate.get_buf_max_mem(0)/
|
| 2618 |
-
// wstate.get_buf_max_mem(1)/
|
| 2619 |
-
// wstate.get_buf_max_mem(2)/
|
| 2620 |
-
// wstate.get_buf_max_mem(3)/
|
| 2621 |
}
|
| 2622 |
|
| 2623 |
if (batch.n_tokens == 1) {
|
|
@@ -3057,7 +3057,7 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
|
|
| 3057 |
|
| 3058 |
{
|
| 3059 |
const size_t memory_size = ggml_nbytes(state->kv_self.k) + ggml_nbytes(state->kv_self.v);
|
| 3060 |
-
WHISPER_LOG_INFO("%s: kv self size = %7.2f MB\n", __func__, memory_size /
|
| 3061 |
}
|
| 3062 |
|
| 3063 |
if (!kv_cache_init(ctx->model.hparams, state->kv_cross, ctx->backend, ctx->itype, ctx->model.hparams.n_audio_ctx)) {
|
|
@@ -3068,7 +3068,7 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
|
|
| 3068 |
|
| 3069 |
{
|
| 3070 |
const size_t memory_size = ggml_nbytes(state->kv_cross.k) + ggml_nbytes(state->kv_cross.v);
|
| 3071 |
-
WHISPER_LOG_INFO("%s: kv cross size = %7.2f MB\n", __func__, memory_size /
|
| 3072 |
}
|
| 3073 |
|
| 3074 |
#ifdef WHISPER_USE_COREML
|
|
@@ -3110,7 +3110,7 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
|
|
| 3110 |
return whisper_build_graph_conv(*ctx, *state, 0);
|
| 3111 |
});
|
| 3112 |
|
| 3113 |
-
WHISPER_LOG_INFO("%s: compute buffer (conv) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_conv) /
|
| 3114 |
}
|
| 3115 |
|
| 3116 |
// encoder allocator
|
|
@@ -3120,7 +3120,7 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
|
|
| 3120 |
return whisper_build_graph_encoder(*ctx, *state);
|
| 3121 |
});
|
| 3122 |
|
| 3123 |
-
WHISPER_LOG_INFO("%s: compute buffer (encode) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_encode) /
|
| 3124 |
}
|
| 3125 |
|
| 3126 |
// cross allocator
|
|
@@ -3130,7 +3130,7 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
|
|
| 3130 |
return whisper_build_graph_cross(*ctx, *state);
|
| 3131 |
});
|
| 3132 |
|
| 3133 |
-
WHISPER_LOG_INFO("%s: compute buffer (cross) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_cross) /
|
| 3134 |
}
|
| 3135 |
|
| 3136 |
// decoder allocator
|
|
@@ -3148,7 +3148,7 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
|
|
| 3148 |
return whisper_build_graph_decoder(*ctx, *state, state->batch);
|
| 3149 |
});
|
| 3150 |
|
| 3151 |
-
WHISPER_LOG_INFO("%s: compute buffer (decode) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_decode) /
|
| 3152 |
}
|
| 3153 |
|
| 3154 |
whisper_allocr_graph_realloc(state->alloc_conv, ctx->backend);
|
|
@@ -6072,8 +6072,8 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {
|
|
| 6072 |
size_t n = 20;
|
| 6073 |
size_t arr = n_threads > 0 ? 1024llu : n_threads; // trick to avoid compiler optimizations
|
| 6074 |
|
| 6075 |
-
// 1GB
|
| 6076 |
-
const size_t size = arr*
|
| 6077 |
|
| 6078 |
// single-thread
|
| 6079 |
{
|
|
@@ -6099,7 +6099,7 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {
|
|
| 6099 |
src[rand() % size] = rand() % 256;
|
| 6100 |
}
|
| 6101 |
|
| 6102 |
-
snprintf(strbuf, sizeof(strbuf), "memcpy: %.2f GB/s (1 thread)\n", (double) (n*size)/(tsum*
|
| 6103 |
s += strbuf;
|
| 6104 |
|
| 6105 |
// needed to prevent the compiler from optimizing the memcpy away
|
|
|
|
| 1522 |
|
| 1523 |
model.buffer = ggml_backend_alloc_buffer(wctx.backend, size_main);
|
| 1524 |
|
| 1525 |
+
WHISPER_LOG_INFO("%s: %8s buffer size = %8.2f MB\n", __func__, ggml_backend_name(wctx.backend), size_main / 1e6);
|
| 1526 |
}
|
| 1527 |
|
| 1528 |
ggml_allocr * alloc = ggml_allocr_new_from_buffer(model.buffer);
|
|
|
|
| 1637 |
ggml_backend_tensor_set(tensor, read_buf.data(), 0, ggml_nbytes(tensor));
|
| 1638 |
}
|
| 1639 |
|
| 1640 |
+
//printf("%48s - [%5d, %5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ne[2], ggml_type_name((ggml_type) ttype), ggml_nbytes(tensor)/1e6);
|
| 1641 |
total_size += ggml_nbytes(tensor);
|
| 1642 |
model.n_loaded++;
|
| 1643 |
}
|
| 1644 |
|
| 1645 |
+
WHISPER_LOG_INFO("%s: model size = %7.2f MB\n", __func__, total_size/1e6);
|
| 1646 |
|
| 1647 |
if (model.n_loaded == 0) {
|
| 1648 |
WHISPER_LOG_WARN("%s: WARN no tensors loaded from model file - assuming empty model for testing\n", __func__);
|
|
|
|
| 2027 |
////////////////////////////////////////////////////////////////////////////
|
| 2028 |
|
| 2029 |
//printf("%s: used_mem = %f MB, %f MB, %f MB %f MB %f MB\n", __func__,
|
| 2030 |
+
// ggml_used_mem(ctx0)/1e6,
|
| 2031 |
+
// wstate.get_buf_max_mem(0)/1e6,
|
| 2032 |
+
// wstate.get_buf_max_mem(1)/1e6,
|
| 2033 |
+
// wstate.get_buf_max_mem(2)/1e6,
|
| 2034 |
+
// wstate.get_buf_max_mem(3)/1e6);
|
| 2035 |
|
| 2036 |
ggml_free(ctx0);
|
| 2037 |
|
|
|
|
| 2613 |
|
| 2614 |
if (batch.n_tokens > 1) {
|
| 2615 |
//printf("%s: used_mem = %f MB, %f MB, %f MB %f MB %f MB\n", __func__,
|
| 2616 |
+
// ggml_used_mem(ctx0)/1e6,
|
| 2617 |
+
// wstate.get_buf_max_mem(0)/1e6,
|
| 2618 |
+
// wstate.get_buf_max_mem(1)/1e6,
|
| 2619 |
+
// wstate.get_buf_max_mem(2)/1e6,
|
| 2620 |
+
// wstate.get_buf_max_mem(3)/1e6);
|
| 2621 |
}
|
| 2622 |
|
| 2623 |
if (batch.n_tokens == 1) {
|
|
|
|
| 3057 |
|
| 3058 |
{
|
| 3059 |
const size_t memory_size = ggml_nbytes(state->kv_self.k) + ggml_nbytes(state->kv_self.v);
|
| 3060 |
+
WHISPER_LOG_INFO("%s: kv self size = %7.2f MB\n", __func__, memory_size / 1e6);
|
| 3061 |
}
|
| 3062 |
|
| 3063 |
if (!kv_cache_init(ctx->model.hparams, state->kv_cross, ctx->backend, ctx->itype, ctx->model.hparams.n_audio_ctx)) {
|
|
|
|
| 3068 |
|
| 3069 |
{
|
| 3070 |
const size_t memory_size = ggml_nbytes(state->kv_cross.k) + ggml_nbytes(state->kv_cross.v);
|
| 3071 |
+
WHISPER_LOG_INFO("%s: kv cross size = %7.2f MB\n", __func__, memory_size / 1e6);
|
| 3072 |
}
|
| 3073 |
|
| 3074 |
#ifdef WHISPER_USE_COREML
|
|
|
|
| 3110 |
return whisper_build_graph_conv(*ctx, *state, 0);
|
| 3111 |
});
|
| 3112 |
|
| 3113 |
+
WHISPER_LOG_INFO("%s: compute buffer (conv) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_conv) / 1e6);
|
| 3114 |
}
|
| 3115 |
|
| 3116 |
// encoder allocator
|
|
|
|
| 3120 |
return whisper_build_graph_encoder(*ctx, *state);
|
| 3121 |
});
|
| 3122 |
|
| 3123 |
+
WHISPER_LOG_INFO("%s: compute buffer (encode) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_encode) / 1e6);
|
| 3124 |
}
|
| 3125 |
|
| 3126 |
// cross allocator
|
|
|
|
| 3130 |
return whisper_build_graph_cross(*ctx, *state);
|
| 3131 |
});
|
| 3132 |
|
| 3133 |
+
WHISPER_LOG_INFO("%s: compute buffer (cross) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_cross) / 1e6);
|
| 3134 |
}
|
| 3135 |
|
| 3136 |
// decoder allocator
|
|
|
|
| 3148 |
return whisper_build_graph_decoder(*ctx, *state, state->batch);
|
| 3149 |
});
|
| 3150 |
|
| 3151 |
+
WHISPER_LOG_INFO("%s: compute buffer (decode) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_decode) / 1e6);
|
| 3152 |
}
|
| 3153 |
|
| 3154 |
whisper_allocr_graph_realloc(state->alloc_conv, ctx->backend);
|
|
|
|
| 6072 |
size_t n = 20;
|
| 6073 |
size_t arr = n_threads > 0 ? 1024llu : n_threads; // trick to avoid compiler optimizations
|
| 6074 |
|
| 6075 |
+
// 1GB array
|
| 6076 |
+
const size_t size = arr*1e9;
|
| 6077 |
|
| 6078 |
// single-thread
|
| 6079 |
{
|
|
|
|
| 6099 |
src[rand() % size] = rand() % 256;
|
| 6100 |
}
|
| 6101 |
|
| 6102 |
+
snprintf(strbuf, sizeof(strbuf), "memcpy: %.2f GB/s (1 thread)\n", (double) (n*size)/(tsum*1e9));
|
| 6103 |
s += strbuf;
|
| 6104 |
|
| 6105 |
// needed to prevent the compiler from optimizing the memcpy away
|