Spaces:
Running
Running
coreml : fix ANE optimized encoder (#1716)
Browse files
coreml/whisper-encoder.mm
CHANGED
|
@@ -24,9 +24,9 @@ struct whisper_coreml_context * whisper_coreml_init(const char * path_model) {
|
|
| 24 |
|
| 25 |
// select which device to run the Core ML model on
|
| 26 |
MLModelConfiguration *config = [[MLModelConfiguration alloc] init];
|
| 27 |
-
config.computeUnits = MLComputeUnitsCPUAndGPU;
|
| 28 |
//config.computeUnits = MLComputeUnitsCPUAndNeuralEngine;
|
| 29 |
-
|
| 30 |
|
| 31 |
const void * data = CFBridgingRetain([[whisper_encoder_impl alloc] initWithContentsOfURL:url_model configuration:config error:nil]);
|
| 32 |
|
|
|
|
| 24 |
|
| 25 |
// select which device to run the Core ML model on
|
| 26 |
MLModelConfiguration *config = [[MLModelConfiguration alloc] init];
|
| 27 |
+
// config.computeUnits = MLComputeUnitsCPUAndGPU;
|
| 28 |
//config.computeUnits = MLComputeUnitsCPUAndNeuralEngine;
|
| 29 |
+
config.computeUnits = MLComputeUnitsAll;
|
| 30 |
|
| 31 |
const void * data = CFBridgingRetain([[whisper_encoder_impl alloc] initWithContentsOfURL:url_model configuration:config error:nil]);
|
| 32 |
|
models/convert-whisper-to-coreml.py
CHANGED
|
@@ -143,20 +143,7 @@ class AudioEncoderANE(AudioEncoder):
|
|
| 143 |
x = block(x)
|
| 144 |
|
| 145 |
x = self.ln_post(x)
|
| 146 |
-
|
| 147 |
-
# """
|
| 148 |
-
# TODO:
|
| 149 |
-
# I think we need to transpose the result here to make it fit whisper.cpp memory order.
|
| 150 |
-
# However, even doing this, the results are still wrong. Kind of less wrong compared to
|
| 151 |
-
# not transposing, but still wrong.
|
| 152 |
-
|
| 153 |
-
# Also, I don't know why the original OpenAI implementation does not need to transpose
|
| 154 |
-
|
| 155 |
-
# transpose to (batch_size, n_ctx, n_state)
|
| 156 |
-
# x : torch.Tensor, shape = (batch_size, n_state, 1, n_ctx)
|
| 157 |
-
|
| 158 |
-
# """
|
| 159 |
-
# x = x.transpose(1,3)
|
| 160 |
|
| 161 |
return x
|
| 162 |
|
|
|
|
| 143 |
x = block(x)
|
| 144 |
|
| 145 |
x = self.ln_post(x)
|
| 146 |
+
x = x.squeeze(2).transpose(1, 2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
return x
|
| 149 |
|
models/generate-coreml-model.sh
CHANGED
|
@@ -23,7 +23,7 @@ if [[ $mname == "-h5" ]]; then
|
|
| 23 |
echo $mpath
|
| 24 |
python3 models/convert-h5-to-coreml.py --model-name $mname --model-path $mpath --encoder-only True
|
| 25 |
else
|
| 26 |
-
python3 models/convert-whisper-to-coreml.py --model $mname --encoder-only True
|
| 27 |
fi
|
| 28 |
|
| 29 |
xcrun coremlc compile models/coreml-encoder-${mname}.mlpackage models/
|
|
|
|
| 23 |
echo $mpath
|
| 24 |
python3 models/convert-h5-to-coreml.py --model-name $mname --model-path $mpath --encoder-only True
|
| 25 |
else
|
| 26 |
+
python3 models/convert-whisper-to-coreml.py --model $mname --encoder-only True --optimize-ane True
|
| 27 |
fi
|
| 28 |
|
| 29 |
xcrun coremlc compile models/coreml-encoder-${mname}.mlpackage models/
|