Spaces:
Sleeping
Sleeping
Michael Podvitskiy
commited on
ggml : fix `error C2078: too many initializers` for MSVC ARM64 (llama/5404)
Browse files- ggml-quants.c +15 -4
ggml-quants.c
CHANGED
|
@@ -268,6 +268,17 @@ static inline float hsum_float_4x4(const __m128 a, const __m128 b, const __m128
|
|
| 268 |
#endif // defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__)
|
| 269 |
|
| 270 |
#if defined(__ARM_NEON)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
#if !defined(__aarch64__)
|
| 272 |
|
| 273 |
// 64-bit compatibility
|
|
@@ -8698,10 +8709,10 @@ void ggml_vec_dot_iq3_xxs_q8_K(const int n, float * restrict s, const void * res
|
|
| 8698 |
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
| 8699 |
q8b = ggml_vld1q_s8_x4(q8); q8 += 64;
|
| 8700 |
memcpy(aux32, gas, 2*sizeof(uint32_t)); gas += 2*sizeof(uint32_t);
|
| 8701 |
-
const uint32x4_t aux32x4_0 =
|
| 8702 |
-
const uint32x4_t aux32x4_1 =
|
| 8703 |
-
const uint32x4_t aux32x4_2 =
|
| 8704 |
-
const uint32x4_t aux32x4_3 =
|
| 8705 |
q3 += 16;
|
| 8706 |
q3s.val[0] = vcombine_s8(vld1_s8((const void *)(signs64 + ((aux32[0] >> 0) & 127))), vld1_s8((const void *)(signs64 + ((aux32[0] >> 7) & 127))));
|
| 8707 |
q3s.val[1] = vcombine_s8(vld1_s8((const void *)(signs64 + ((aux32[0] >> 14) & 127))), vld1_s8((const void *)(signs64 + ((aux32[0] >> 21) & 127))));
|
|
|
|
| 268 |
#endif // defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__)
|
| 269 |
|
| 270 |
#if defined(__ARM_NEON)
|
| 271 |
+
|
| 272 |
+
#ifdef _MSC_VER
|
| 273 |
+
|
| 274 |
+
#define ggml_vld1q_u32(w,x,y,z) { ((w) + ((uint64_t)(x) << 32)), ((y) + ((uint64_t)(z) << 32)) }
|
| 275 |
+
|
| 276 |
+
#else
|
| 277 |
+
|
| 278 |
+
#define ggml_vld1q_u32(w,x,y,z) { (w), (x), (y), (z) }
|
| 279 |
+
|
| 280 |
+
#endif
|
| 281 |
+
|
| 282 |
#if !defined(__aarch64__)
|
| 283 |
|
| 284 |
// 64-bit compatibility
|
|
|
|
| 8709 |
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
| 8710 |
q8b = ggml_vld1q_s8_x4(q8); q8 += 64;
|
| 8711 |
memcpy(aux32, gas, 2*sizeof(uint32_t)); gas += 2*sizeof(uint32_t);
|
| 8712 |
+
const uint32x4_t aux32x4_0 = ggml_vld1q_u32(iq3xxs_grid[q3[ 0]], iq3xxs_grid[q3[ 1]], iq3xxs_grid[q3[ 2]], iq3xxs_grid[q3[ 3]]);
|
| 8713 |
+
const uint32x4_t aux32x4_1 = ggml_vld1q_u32(iq3xxs_grid[q3[ 4]], iq3xxs_grid[q3[ 5]], iq3xxs_grid[q3[ 6]], iq3xxs_grid[q3[ 7]]);
|
| 8714 |
+
const uint32x4_t aux32x4_2 = ggml_vld1q_u32(iq3xxs_grid[q3[ 8]], iq3xxs_grid[q3[ 9]], iq3xxs_grid[q3[10]], iq3xxs_grid[q3[11]]);
|
| 8715 |
+
const uint32x4_t aux32x4_3 = ggml_vld1q_u32(iq3xxs_grid[q3[12]], iq3xxs_grid[q3[13]], iq3xxs_grid[q3[14]], iq3xxs_grid[q3[15]]);
|
| 8716 |
q3 += 16;
|
| 8717 |
q3s.val[0] = vcombine_s8(vld1_s8((const void *)(signs64 + ((aux32[0] >> 0) & 127))), vld1_s8((const void *)(signs64 + ((aux32[0] >> 7) & 127))));
|
| 8718 |
q3s.val[1] = vcombine_s8(vld1_s8((const void *)(signs64 + ((aux32[0] >> 14) & 127))), vld1_s8((const void *)(signs64 + ((aux32[0] >> 21) & 127))));
|