Spaces:
Sleeping
Sleeping
Didzis Gosko
commited on
whisper : expose CUDA device setting in public API (#1840)
Browse files* Makefile : allow to override CUDA_ARCH_FLAG
* whisper : allow to select GPU (CUDA) device from public API
- Makefile +2 -2
- whisper.cpp +2 -1
- whisper.h +1 -0
Makefile
CHANGED
|
@@ -215,9 +215,9 @@ endif
|
|
| 215 |
|
| 216 |
ifdef WHISPER_CUBLAS
|
| 217 |
ifeq ($(shell expr $(NVCC_VERSION) \>= 11.6), 1)
|
| 218 |
-
CUDA_ARCH_FLAG
|
| 219 |
else
|
| 220 |
-
CUDA_ARCH_FLAG
|
| 221 |
endif
|
| 222 |
|
| 223 |
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
|
|
|
|
| 215 |
|
| 216 |
ifdef WHISPER_CUBLAS
|
| 217 |
ifeq ($(shell expr $(NVCC_VERSION) \>= 11.6), 1)
|
| 218 |
+
CUDA_ARCH_FLAG ?= native
|
| 219 |
else
|
| 220 |
+
CUDA_ARCH_FLAG ?= all
|
| 221 |
endif
|
| 222 |
|
| 223 |
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
|
whisper.cpp
CHANGED
|
@@ -1060,7 +1060,7 @@ static ggml_backend_t whisper_backend_init(const whisper_context_params & params
|
|
| 1060 |
#ifdef GGML_USE_CUBLAS
|
| 1061 |
if (params.use_gpu && ggml_cublas_loaded()) {
|
| 1062 |
WHISPER_LOG_INFO("%s: using CUDA backend\n", __func__);
|
| 1063 |
-
backend_gpu = ggml_backend_cuda_init(
|
| 1064 |
if (!backend_gpu) {
|
| 1065 |
WHISPER_LOG_ERROR("%s: ggml_backend_cuda_init() failed\n", __func__);
|
| 1066 |
}
|
|
@@ -3213,6 +3213,7 @@ int whisper_ctx_init_openvino_encoder(
|
|
| 3213 |
struct whisper_context_params whisper_context_default_params() {
|
| 3214 |
struct whisper_context_params result = {
|
| 3215 |
/*.use_gpu =*/ true,
|
|
|
|
| 3216 |
};
|
| 3217 |
return result;
|
| 3218 |
}
|
|
|
|
| 1060 |
#ifdef GGML_USE_CUBLAS
|
| 1061 |
if (params.use_gpu && ggml_cublas_loaded()) {
|
| 1062 |
WHISPER_LOG_INFO("%s: using CUDA backend\n", __func__);
|
| 1063 |
+
backend_gpu = ggml_backend_cuda_init(params.gpu_device);
|
| 1064 |
if (!backend_gpu) {
|
| 1065 |
WHISPER_LOG_ERROR("%s: ggml_backend_cuda_init() failed\n", __func__);
|
| 1066 |
}
|
|
|
|
| 3213 |
struct whisper_context_params whisper_context_default_params() {
|
| 3214 |
struct whisper_context_params result = {
|
| 3215 |
/*.use_gpu =*/ true,
|
| 3216 |
+
/*.gpu_device =*/ 0,
|
| 3217 |
};
|
| 3218 |
return result;
|
| 3219 |
}
|
whisper.h
CHANGED
|
@@ -86,6 +86,7 @@ extern "C" {
|
|
| 86 |
|
| 87 |
struct whisper_context_params {
|
| 88 |
bool use_gpu;
|
|
|
|
| 89 |
};
|
| 90 |
|
| 91 |
typedef struct whisper_token_data {
|
|
|
|
| 86 |
|
| 87 |
struct whisper_context_params {
|
| 88 |
bool use_gpu;
|
| 89 |
+
int gpu_device; // CUDA device
|
| 90 |
};
|
| 91 |
|
| 92 |
typedef struct whisper_token_data {
|