ggerganov commited on
Commit
bf551c0
·
1 Parent(s): c0f40a4

Add OpenBLAS support

Browse files

Supported via CMake - just add:

cmake .. -DWHISPER_SUPPORT_OPENBLAS=ON

On Ubuntu, you have to install the library like this:

apt install libopenblas-dev

Unfortunately, I don't observe any benefit compared to the
original AVX2 + FP16 implementation. Maybe I'm missing something

Files changed (2) hide show
  1. CMakeLists.txt +18 -1
  2. ggml.c +43 -43
CMakeLists.txt CHANGED
@@ -41,8 +41,13 @@ option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STAND
41
 
42
  option(WHISPER_SUPPORT_SDL2 "whisper: support for libSDL2" OFF)
43
 
 
 
 
 
 
 
44
  option(WHISPER_PERF "whisper: enable perf timings" OFF)
45
- option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF)
46
 
47
  # sanitizers
48
 
@@ -86,6 +91,18 @@ if (APPLE AND NOT WHISPER_NO_ACCELERATE)
86
  endif()
87
  endif()
88
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  # compiler flags
90
 
91
  if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
 
41
 
42
  option(WHISPER_SUPPORT_SDL2 "whisper: support for libSDL2" OFF)
43
 
44
+ if (APPLE)
45
+ option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF)
46
+ else()
47
+ option(WHISPER_SUPPORT_OPENBLAS "whisper: support for OpenBLAS" OFF)
48
+ endif()
49
+
50
  option(WHISPER_PERF "whisper: enable perf timings" OFF)
 
51
 
52
  # sanitizers
53
 
 
91
  endif()
92
  endif()
93
 
94
+ if (WHISPER_SUPPORT_OPENBLAS)
95
+ find_library(OPENBLAS_LIB openblas)
96
+ if (OPENBLAS_LIB)
97
+ message(STATUS "OpenBLAS found")
98
+
99
+ set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${OPENBLAS_LIB})
100
+ set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_OPENBLAS)
101
+ else()
102
+ message(WARNING "OpenBLAS not found")
103
+ endif()
104
+ endif()
105
+
106
  # compiler flags
107
 
108
  if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
ggml.c CHANGED
@@ -76,6 +76,8 @@ typedef void* thread_ret_t;
76
 
77
  #ifdef GGML_USE_ACCELERATE
78
  #include <Accelerate/Accelerate.h>
 
 
79
  #endif
80
 
81
  // floating point type used to accumulate sums
@@ -4055,46 +4057,44 @@ void ggml_compute_forward_mul_mat_f32(
4055
  // nb00 < nb01 - src0 is transposed
4056
  // compute by src0 columns
4057
 
4058
- //#ifdef GGML_USE_ACCELERATE
4059
- // if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
4060
- // GGML_ASSERT(ggml_is_contiguous(src0));
4061
- // GGML_ASSERT(nb10 == sizeof(float));
4062
- //
4063
- // if (params->ith != 0) return;
4064
- //
4065
- // if (params->type == GGML_TASK_INIT) {
4066
- // return;
4067
- // }
4068
- //
4069
- // if (params->type == GGML_TASK_FINALIZE) {
4070
- // return;
4071
- // }
4072
- //
4073
- // float * const wdata = params->wdata;
4074
- //
4075
- // for (int i03 = 0; i03 < ne03; i03++) {
4076
- // for (int i02 = 0; i02 < ne02; i02++) {
4077
- // const float * x = (float *) (src0->data);
4078
- // const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13);
4079
- //
4080
- // float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
4081
- //
4082
- // // zT = y * xT
4083
- // {
4084
- // cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
4085
- // ne11, ne01, ne10,
4086
- // 1.0f, y, ne10,
4087
- // x, ne10,
4088
- // 0.0f, d, ne01);
4089
- // }
4090
- // }
4091
- // }
4092
- //
4093
- // //printf("CBLAS F32 = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
4094
- //
4095
- // return;
4096
- // }
4097
- //#endif
4098
 
4099
  if (params->type == GGML_TASK_INIT) {
4100
  if (nb01 >= nb00) {
@@ -4301,7 +4301,7 @@ void ggml_compute_forward_mul_mat_f16_f32(
4301
  // nb00 < nb01 - src0 is transposed
4302
  // compute by src0 columns
4303
 
4304
- #ifdef GGML_USE_ACCELERATE
4305
  if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
4306
  GGML_ASSERT(nb10 == sizeof(float));
4307
 
@@ -6857,7 +6857,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
6857
  } else {
6858
  if (node->src0->type == GGML_TYPE_F16 &&
6859
  node->src1->type == GGML_TYPE_F32) {
6860
- #ifdef GGML_USE_ACCELERATE
6861
  if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
6862
  cur = sizeof(float)*(node->src0->ne[0]*node->src0->ne[1]);
6863
  } else {
@@ -8074,7 +8074,7 @@ int ggml_cpu_has_wasm_simd(void) {
8074
  }
8075
 
8076
  int ggml_cpu_has_blas(void) {
8077
- #if defined(GGML_USE_BLAS) || defined(GGML_USE_ACCELERATE)
8078
  return 1;
8079
  #else
8080
  return 0;
 
76
 
77
  #ifdef GGML_USE_ACCELERATE
78
  #include <Accelerate/Accelerate.h>
79
+ #elif GGML_USE_OPENBLAS
80
+ #include <cblas.h>
81
  #endif
82
 
83
  // floating point type used to accumulate sums
 
4057
  // nb00 < nb01 - src0 is transposed
4058
  // compute by src0 columns
4059
 
4060
+ #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
4061
+ if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
4062
+ GGML_ASSERT(ggml_is_contiguous(src0));
4063
+ GGML_ASSERT(nb10 == sizeof(float));
4064
+
4065
+ if (params->ith != 0) return;
4066
+
4067
+ if (params->type == GGML_TASK_INIT) {
4068
+ return;
4069
+ }
4070
+
4071
+ if (params->type == GGML_TASK_FINALIZE) {
4072
+ return;
4073
+ }
4074
+
4075
+ for (int i03 = 0; i03 < ne03; i03++) {
4076
+ for (int i02 = 0; i02 < ne02; i02++) {
4077
+ const float * x = (float *) (src0->data);
4078
+ const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13);
4079
+
4080
+ float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
4081
+
4082
+ // zT = y * xT
4083
+ {
4084
+ cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
4085
+ ne11, ne01, ne10,
4086
+ 1.0f, y, ne10,
4087
+ x, ne10,
4088
+ 0.0f, d, ne01);
4089
+ }
4090
+ }
4091
+ }
4092
+
4093
+ //printf("CBLAS F32 = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
4094
+
4095
+ return;
4096
+ }
4097
+ #endif
 
 
4098
 
4099
  if (params->type == GGML_TASK_INIT) {
4100
  if (nb01 >= nb00) {
 
4301
  // nb00 < nb01 - src0 is transposed
4302
  // compute by src0 columns
4303
 
4304
+ #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
4305
  if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
4306
  GGML_ASSERT(nb10 == sizeof(float));
4307
 
 
6857
  } else {
6858
  if (node->src0->type == GGML_TYPE_F16 &&
6859
  node->src1->type == GGML_TYPE_F32) {
6860
+ #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
6861
  if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
6862
  cur = sizeof(float)*(node->src0->ne[0]*node->src0->ne[1]);
6863
  } else {
 
8074
  }
8075
 
8076
  int ggml_cpu_has_blas(void) {
8077
+ #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
8078
  return 1;
8079
  #else
8080
  return 0;