Spaces:
Running
Running
File size: 5,284 Bytes
1d5c7f2 5767578 3fa9a6f 1d5c7f2 fc6989a 1d5c7f2 fc6989a 1d5c7f2 fc6989a 1d5c7f2 fc6989a 1d5c7f2 fc6989a 1d5c7f2 fc6989a 1d5c7f2 fc6989a 5767578 3859606 5767578 3859606 5767578 3859606 5767578 3859606 5767578 3859606 5767578 3859606 5767578 3859606 5767578 3859606 5767578 3859606 5767578 3859606 5767578 1d5c7f2 290abed 5767578 290abed 1d5c7f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import Foundation
import UIKit
import whisper
enum WhisperError: Error {
case couldNotInitializeContext
}
// Meet Whisper C++ constraint: Don't access from more than one thread at a time.
actor WhisperContext {
private var context: OpaquePointer
init(context: OpaquePointer) {
self.context = context
}
deinit {
whisper_free(context)
}
func fullTranscribe(samples: [Float]) {
// Leave 2 processors free (i.e. the high-efficiency cores).
let maxThreads = max(1, min(8, cpuCount() - 2))
print("Selecting \(maxThreads) threads")
var params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY)
"en".withCString { en in
// Adapted from whisper.objc
params.print_realtime = true
params.print_progress = false
params.print_timestamps = true
params.print_special = false
params.translate = false
params.language = en
params.n_threads = Int32(maxThreads)
params.offset_ms = 0
params.no_context = true
params.single_segment = false
whisper_reset_timings(context)
print("About to run whisper_full")
samples.withUnsafeBufferPointer { samples in
if (whisper_full(context, params, samples.baseAddress, Int32(samples.count)) != 0) {
print("Failed to run the model")
} else {
whisper_print_timings(context)
}
}
}
}
func getTranscription() -> String {
var transcription = ""
for i in 0..<whisper_full_n_segments(context) {
transcription += String.init(cString: whisper_full_get_segment_text(context, i))
}
return transcription
}
static func benchMemcpy(nThreads: Int32) async -> String {
return String.init(cString: whisper_bench_memcpy_str(nThreads))
}
static func benchGgmlMulMat(nThreads: Int32) async -> String {
return String.init(cString: whisper_bench_ggml_mul_mat_str(nThreads))
}
private func systemInfo() -> String {
var info = ""
//if (ggml_cpu_has_neon() != 0) { info += "NEON " }
return String(info.dropLast())
}
func benchFull(modelName: String, nThreads: Int32) async -> String {
let nMels = whisper_model_n_mels(context)
if (whisper_set_mel(context, nil, 0, nMels) != 0) {
return "error: failed to set mel"
}
// heat encoder
if (whisper_encode(context, 0, nThreads) != 0) {
return "error: failed to encode"
}
var tokens = [whisper_token](repeating: 0, count: 512)
// prompt heat
if (whisper_decode(context, &tokens, 256, 0, nThreads) != 0) {
return "error: failed to decode"
}
// text-generation heat
if (whisper_decode(context, &tokens, 1, 256, nThreads) != 0) {
return "error: failed to decode"
}
whisper_reset_timings(context)
// actual run
if (whisper_encode(context, 0, nThreads) != 0) {
return "error: failed to encode"
}
// text-generation
for i in 0..<256 {
if (whisper_decode(context, &tokens, 1, Int32(i), nThreads) != 0) {
return "error: failed to decode"
}
}
// batched decoding
for _ in 0..<64 {
if (whisper_decode(context, &tokens, 5, 0, nThreads) != 0) {
return "error: failed to decode"
}
}
// prompt processing
for _ in 0..<16 {
if (whisper_decode(context, &tokens, 256, 0, nThreads) != 0) {
return "error: failed to decode"
}
}
whisper_print_timings(context)
let deviceModel = await UIDevice.current.model
let systemName = await UIDevice.current.systemName
let systemInfo = self.systemInfo()
let timings: whisper_timings = whisper_get_timings(context).pointee
let encodeMs = String(format: "%.2f", timings.encode_ms)
let decodeMs = String(format: "%.2f", timings.decode_ms)
let batchdMs = String(format: "%.2f", timings.batchd_ms)
let promptMs = String(format: "%.2f", timings.prompt_ms)
return "| \(deviceModel) | \(systemName) | \(systemInfo) | \(modelName) | \(nThreads) | 1 | \(encodeMs) | \(decodeMs) | \(batchdMs) | \(promptMs) | <todo> |"
}
static func createContext(path: String) throws -> WhisperContext {
var params = whisper_context_default_params()
#if targetEnvironment(simulator)
params.use_gpu = false
print("Running on the simulator, using CPU")
#else
params.flash_attn = true // Enabled by default for Metal
#endif
let context = whisper_init_from_file_with_params(path, params)
if let context {
return WhisperContext(context: context)
} else {
print("Couldn't load model at \(path)")
throw WhisperError.couldNotInitializeContext
}
}
}
fileprivate func cpuCount() -> Int {
ProcessInfo.processInfo.processorCount
}
|