Spaces:
Sleeping
Sleeping
ggml : 32-bit arm compat (#1891)
Browse files* ggml : 32-bit arm compat
* ggml : add ggml_vqtbl1q_s8 impl
* ggml : cont
examples/whisper.android/lib/src/main/jni/whisper/CMakeLists.txt
CHANGED
|
@@ -9,10 +9,10 @@ set(WHISPER_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../../../../..)
|
|
| 9 |
option(GGML_HOME "whisper: Path to external GGML source" OFF)
|
| 10 |
|
| 11 |
set(
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
)
|
| 16 |
|
| 17 |
if (NOT GGML_HOME)
|
| 18 |
set(
|
|
@@ -22,8 +22,7 @@ if (NOT GGML_HOME)
|
|
| 22 |
${WHISPER_LIB_DIR}/ggml-alloc.c
|
| 23 |
${WHISPER_LIB_DIR}/ggml-backend.c
|
| 24 |
${WHISPER_LIB_DIR}/ggml-quants.c
|
| 25 |
-
|
| 26 |
-
)
|
| 27 |
endif()
|
| 28 |
|
| 29 |
find_library(LOG_LIB log)
|
|
@@ -44,7 +43,6 @@ function(build_library target_name)
|
|
| 44 |
endif ()
|
| 45 |
|
| 46 |
if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
|
| 47 |
-
|
| 48 |
target_compile_options(${target_name} PRIVATE -O3)
|
| 49 |
target_compile_options(${target_name} PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden)
|
| 50 |
target_compile_options(${target_name} PRIVATE -ffunction-sections -fdata-sections)
|
|
@@ -52,7 +50,6 @@ function(build_library target_name)
|
|
| 52 |
target_link_options(${target_name} PRIVATE -Wl,--gc-sections)
|
| 53 |
target_link_options(${target_name} PRIVATE -Wl,--exclude-libs,ALL)
|
| 54 |
target_link_options(${target_name} PRIVATE -flto)
|
| 55 |
-
|
| 56 |
endif ()
|
| 57 |
|
| 58 |
if (GGML_HOME)
|
|
|
|
| 9 |
option(GGML_HOME "whisper: Path to external GGML source" OFF)
|
| 10 |
|
| 11 |
set(
|
| 12 |
+
SOURCE_FILES
|
| 13 |
+
${WHISPER_LIB_DIR}/whisper.cpp
|
| 14 |
+
${CMAKE_SOURCE_DIR}/jni.c
|
| 15 |
+
)
|
| 16 |
|
| 17 |
if (NOT GGML_HOME)
|
| 18 |
set(
|
|
|
|
| 22 |
${WHISPER_LIB_DIR}/ggml-alloc.c
|
| 23 |
${WHISPER_LIB_DIR}/ggml-backend.c
|
| 24 |
${WHISPER_LIB_DIR}/ggml-quants.c
|
| 25 |
+
)
|
|
|
|
| 26 |
endif()
|
| 27 |
|
| 28 |
find_library(LOG_LIB log)
|
|
|
|
| 43 |
endif ()
|
| 44 |
|
| 45 |
if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
|
|
|
|
| 46 |
target_compile_options(${target_name} PRIVATE -O3)
|
| 47 |
target_compile_options(${target_name} PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden)
|
| 48 |
target_compile_options(${target_name} PRIVATE -ffunction-sections -fdata-sections)
|
|
|
|
| 50 |
target_link_options(${target_name} PRIVATE -Wl,--gc-sections)
|
| 51 |
target_link_options(${target_name} PRIVATE -Wl,--exclude-libs,ALL)
|
| 52 |
target_link_options(${target_name} PRIVATE -flto)
|
|
|
|
| 53 |
endif ()
|
| 54 |
|
| 55 |
if (GGML_HOME)
|
ggml-quants.c
CHANGED
|
@@ -438,6 +438,30 @@ inline static ggml_int8x16x4_t ggml_vld1q_s8_x4(const int8_t * ptr) {
|
|
| 438 |
return res;
|
| 439 |
}
|
| 440 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
#else
|
| 442 |
|
| 443 |
#define ggml_int16x8x2_t int16x8x2_t
|
|
@@ -451,6 +475,7 @@ inline static ggml_int8x16x4_t ggml_vld1q_s8_x4(const int8_t * ptr) {
|
|
| 451 |
#define ggml_vld1q_u8_x4 vld1q_u8_x4
|
| 452 |
#define ggml_vld1q_s8_x2 vld1q_s8_x2
|
| 453 |
#define ggml_vld1q_s8_x4 vld1q_s8_x4
|
|
|
|
| 454 |
|
| 455 |
#endif
|
| 456 |
|
|
@@ -9333,7 +9358,7 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const
|
|
| 9333 |
uint16_t gindex[8];
|
| 9334 |
uint16x8x2_t vindex;
|
| 9335 |
int8x16x4_t q1b;
|
| 9336 |
-
|
| 9337 |
uint16x8x4_t scales;
|
| 9338 |
int32x4x2_t sumi;
|
| 9339 |
int32x4x2_t dotq;
|
|
@@ -9506,10 +9531,10 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * restrict s, size_t bs, const void *
|
|
| 9506 |
q8b.val[2] = vld1q_s8(y[ib+1].qs);
|
| 9507 |
q8b.val[3] = vld1q_s8(y[ib+1].qs + 16);
|
| 9508 |
|
| 9509 |
-
q4b.val[0] =
|
| 9510 |
-
q4b.val[1] =
|
| 9511 |
-
q4b.val[2] =
|
| 9512 |
-
q4b.val[3] =
|
| 9513 |
|
| 9514 |
prod_1 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q4b.val[0], q8b.val[0]), q4b.val[1], q8b.val[1]);
|
| 9515 |
prod_2 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q4b.val[2], q8b.val[2]), q4b.val[3], q8b.val[3]);
|
|
|
|
| 438 |
return res;
|
| 439 |
}
|
| 440 |
|
| 441 |
+
// NOTE: not tested
|
| 442 |
+
inline static int8x16_t ggml_vqtbl1q_s8(int8x16_t a, uint8x16_t b) {
|
| 443 |
+
int8x16_t res;
|
| 444 |
+
|
| 445 |
+
res[ 0] = a[b[ 0]];
|
| 446 |
+
res[ 1] = a[b[ 1]];
|
| 447 |
+
res[ 2] = a[b[ 2]];
|
| 448 |
+
res[ 3] = a[b[ 3]];
|
| 449 |
+
res[ 4] = a[b[ 4]];
|
| 450 |
+
res[ 5] = a[b[ 5]];
|
| 451 |
+
res[ 6] = a[b[ 6]];
|
| 452 |
+
res[ 7] = a[b[ 7]];
|
| 453 |
+
res[ 8] = a[b[ 8]];
|
| 454 |
+
res[ 9] = a[b[ 9]];
|
| 455 |
+
res[10] = a[b[10]];
|
| 456 |
+
res[11] = a[b[11]];
|
| 457 |
+
res[12] = a[b[12]];
|
| 458 |
+
res[13] = a[b[13]];
|
| 459 |
+
res[14] = a[b[14]];
|
| 460 |
+
res[15] = a[b[15]];
|
| 461 |
+
|
| 462 |
+
return res;
|
| 463 |
+
}
|
| 464 |
+
|
| 465 |
#else
|
| 466 |
|
| 467 |
#define ggml_int16x8x2_t int16x8x2_t
|
|
|
|
| 475 |
#define ggml_vld1q_u8_x4 vld1q_u8_x4
|
| 476 |
#define ggml_vld1q_s8_x2 vld1q_s8_x2
|
| 477 |
#define ggml_vld1q_s8_x4 vld1q_s8_x4
|
| 478 |
+
#define ggml_vqtbl1q_s8 vqtbl1q_s8
|
| 479 |
|
| 480 |
#endif
|
| 481 |
|
|
|
|
| 9358 |
uint16_t gindex[8];
|
| 9359 |
uint16x8x2_t vindex;
|
| 9360 |
int8x16x4_t q1b;
|
| 9361 |
+
ggml_int8x16x4_t q8b;
|
| 9362 |
uint16x8x4_t scales;
|
| 9363 |
int32x4x2_t sumi;
|
| 9364 |
int32x4x2_t dotq;
|
|
|
|
| 9531 |
q8b.val[2] = vld1q_s8(y[ib+1].qs);
|
| 9532 |
q8b.val[3] = vld1q_s8(y[ib+1].qs + 16);
|
| 9533 |
|
| 9534 |
+
q4b.val[0] = ggml_vqtbl1q_s8(values, vandq_u8 (q4bits.val[0], m4b));
|
| 9535 |
+
q4b.val[1] = ggml_vqtbl1q_s8(values, vshrq_n_u8(q4bits.val[0], 4));
|
| 9536 |
+
q4b.val[2] = ggml_vqtbl1q_s8(values, vandq_u8 (q4bits.val[1], m4b));
|
| 9537 |
+
q4b.val[3] = ggml_vqtbl1q_s8(values, vshrq_n_u8(q4bits.val[1], 4));
|
| 9538 |
|
| 9539 |
prod_1 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q4b.val[0], q8b.val[0]), q4b.val[1], q8b.val[1]);
|
| 9540 |
prod_2 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q4b.val[2], q8b.val[2]), q4b.val[3], q8b.val[3]);
|