ggerganov commited on
Commit
7551157
·
unverified ·
1 Parent(s): 096caf3

ggml : 32-bit arm compat (#1891)

Browse files

* ggml : 32-bit arm compat

* ggml : add ggml_vqtbl1q_s8 impl

* ggml : cont

examples/whisper.android/lib/src/main/jni/whisper/CMakeLists.txt CHANGED
@@ -9,10 +9,10 @@ set(WHISPER_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../../../../..)
9
  option(GGML_HOME "whisper: Path to external GGML source" OFF)
10
 
11
  set(
12
- SOURCE_FILES
13
- ${WHISPER_LIB_DIR}/whisper.cpp
14
- ${CMAKE_SOURCE_DIR}/jni.c
15
- )
16
 
17
  if (NOT GGML_HOME)
18
  set(
@@ -22,8 +22,7 @@ if (NOT GGML_HOME)
22
  ${WHISPER_LIB_DIR}/ggml-alloc.c
23
  ${WHISPER_LIB_DIR}/ggml-backend.c
24
  ${WHISPER_LIB_DIR}/ggml-quants.c
25
-
26
- )
27
  endif()
28
 
29
  find_library(LOG_LIB log)
@@ -44,7 +43,6 @@ function(build_library target_name)
44
  endif ()
45
 
46
  if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
47
-
48
  target_compile_options(${target_name} PRIVATE -O3)
49
  target_compile_options(${target_name} PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden)
50
  target_compile_options(${target_name} PRIVATE -ffunction-sections -fdata-sections)
@@ -52,7 +50,6 @@ function(build_library target_name)
52
  target_link_options(${target_name} PRIVATE -Wl,--gc-sections)
53
  target_link_options(${target_name} PRIVATE -Wl,--exclude-libs,ALL)
54
  target_link_options(${target_name} PRIVATE -flto)
55
-
56
  endif ()
57
 
58
  if (GGML_HOME)
 
9
  option(GGML_HOME "whisper: Path to external GGML source" OFF)
10
 
11
  set(
12
+ SOURCE_FILES
13
+ ${WHISPER_LIB_DIR}/whisper.cpp
14
+ ${CMAKE_SOURCE_DIR}/jni.c
15
+ )
16
 
17
  if (NOT GGML_HOME)
18
  set(
 
22
  ${WHISPER_LIB_DIR}/ggml-alloc.c
23
  ${WHISPER_LIB_DIR}/ggml-backend.c
24
  ${WHISPER_LIB_DIR}/ggml-quants.c
25
+ )
 
26
  endif()
27
 
28
  find_library(LOG_LIB log)
 
43
  endif ()
44
 
45
  if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
 
46
  target_compile_options(${target_name} PRIVATE -O3)
47
  target_compile_options(${target_name} PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden)
48
  target_compile_options(${target_name} PRIVATE -ffunction-sections -fdata-sections)
 
50
  target_link_options(${target_name} PRIVATE -Wl,--gc-sections)
51
  target_link_options(${target_name} PRIVATE -Wl,--exclude-libs,ALL)
52
  target_link_options(${target_name} PRIVATE -flto)
 
53
  endif ()
54
 
55
  if (GGML_HOME)
ggml-quants.c CHANGED
@@ -438,6 +438,30 @@ inline static ggml_int8x16x4_t ggml_vld1q_s8_x4(const int8_t * ptr) {
438
  return res;
439
  }
440
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
  #else
442
 
443
  #define ggml_int16x8x2_t int16x8x2_t
@@ -451,6 +475,7 @@ inline static ggml_int8x16x4_t ggml_vld1q_s8_x4(const int8_t * ptr) {
451
  #define ggml_vld1q_u8_x4 vld1q_u8_x4
452
  #define ggml_vld1q_s8_x2 vld1q_s8_x2
453
  #define ggml_vld1q_s8_x4 vld1q_s8_x4
 
454
 
455
  #endif
456
 
@@ -9333,7 +9358,7 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const
9333
  uint16_t gindex[8];
9334
  uint16x8x2_t vindex;
9335
  int8x16x4_t q1b;
9336
- int8x16x4_t q8b;
9337
  uint16x8x4_t scales;
9338
  int32x4x2_t sumi;
9339
  int32x4x2_t dotq;
@@ -9506,10 +9531,10 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * restrict s, size_t bs, const void *
9506
  q8b.val[2] = vld1q_s8(y[ib+1].qs);
9507
  q8b.val[3] = vld1q_s8(y[ib+1].qs + 16);
9508
 
9509
- q4b.val[0] = vqtbl1q_s8(values, vandq_u8(q4bits.val[0], m4b));
9510
- q4b.val[1] = vqtbl1q_s8(values, vshrq_n_u8(q4bits.val[0], 4));
9511
- q4b.val[2] = vqtbl1q_s8(values, vandq_u8(q4bits.val[1], m4b));
9512
- q4b.val[3] = vqtbl1q_s8(values, vshrq_n_u8(q4bits.val[1], 4));
9513
 
9514
  prod_1 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q4b.val[0], q8b.val[0]), q4b.val[1], q8b.val[1]);
9515
  prod_2 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q4b.val[2], q8b.val[2]), q4b.val[3], q8b.val[3]);
 
438
  return res;
439
  }
440
 
441
+ // NOTE: not tested
442
+ inline static int8x16_t ggml_vqtbl1q_s8(int8x16_t a, uint8x16_t b) {
443
+ int8x16_t res;
444
+
445
+ res[ 0] = a[b[ 0]];
446
+ res[ 1] = a[b[ 1]];
447
+ res[ 2] = a[b[ 2]];
448
+ res[ 3] = a[b[ 3]];
449
+ res[ 4] = a[b[ 4]];
450
+ res[ 5] = a[b[ 5]];
451
+ res[ 6] = a[b[ 6]];
452
+ res[ 7] = a[b[ 7]];
453
+ res[ 8] = a[b[ 8]];
454
+ res[ 9] = a[b[ 9]];
455
+ res[10] = a[b[10]];
456
+ res[11] = a[b[11]];
457
+ res[12] = a[b[12]];
458
+ res[13] = a[b[13]];
459
+ res[14] = a[b[14]];
460
+ res[15] = a[b[15]];
461
+
462
+ return res;
463
+ }
464
+
465
  #else
466
 
467
  #define ggml_int16x8x2_t int16x8x2_t
 
475
  #define ggml_vld1q_u8_x4 vld1q_u8_x4
476
  #define ggml_vld1q_s8_x2 vld1q_s8_x2
477
  #define ggml_vld1q_s8_x4 vld1q_s8_x4
478
+ #define ggml_vqtbl1q_s8 vqtbl1q_s8
479
 
480
  #endif
481
 
 
9358
  uint16_t gindex[8];
9359
  uint16x8x2_t vindex;
9360
  int8x16x4_t q1b;
9361
+ ggml_int8x16x4_t q8b;
9362
  uint16x8x4_t scales;
9363
  int32x4x2_t sumi;
9364
  int32x4x2_t dotq;
 
9531
  q8b.val[2] = vld1q_s8(y[ib+1].qs);
9532
  q8b.val[3] = vld1q_s8(y[ib+1].qs + 16);
9533
 
9534
+ q4b.val[0] = ggml_vqtbl1q_s8(values, vandq_u8 (q4bits.val[0], m4b));
9535
+ q4b.val[1] = ggml_vqtbl1q_s8(values, vshrq_n_u8(q4bits.val[0], 4));
9536
+ q4b.val[2] = ggml_vqtbl1q_s8(values, vandq_u8 (q4bits.val[1], m4b));
9537
+ q4b.val[3] = ggml_vqtbl1q_s8(values, vshrq_n_u8(q4bits.val[1], 4));
9538
 
9539
  prod_1 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q4b.val[0], q8b.val[0]), q4b.val[1], q8b.val[1]);
9540
  prod_2 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q4b.val[2], q8b.val[2]), q4b.val[3], q8b.val[3]);