ggerganov commited on
Commit
df78c25
·
1 Parent(s): d25c1e3

ggml : try fix ppc64 (#0)

Browse files
Files changed (2) hide show
  1. ggml-quants.c +1 -1
  2. ggml.c +2 -0
ggml-quants.c CHANGED
@@ -11425,7 +11425,7 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * restrict s, size_t bs, const void
11425
 
11426
  vector signed short qxh = (vector signed short)vec_sld(vec_splats(qh[1]), vec_splats(qh[0]), 8);
11427
  qh += 2;
11428
- vector bool short vsel = vec_cmpge(qxh, (vector signed short)v0);
11429
 
11430
  vector signed short q8ysum = vec_sel((vector signed short)vec_xor((vector unsigned short)q8ysums, vsign), q8ysums, vsel);
11431
 
 
11425
 
11426
  vector signed short qxh = (vector signed short)vec_sld(vec_splats(qh[1]), vec_splats(qh[0]), 8);
11427
  qh += 2;
11428
+ vector __bool short vsel = vec_cmpge(qxh, (vector signed short)v0);
11429
 
11430
  vector signed short q8ysum = vec_sel((vector signed short)vec_xor((vector unsigned short)q8ysums, vsign), q8ysums, vsel);
11431
 
ggml.c CHANGED
@@ -1306,6 +1306,8 @@ static inline void __avx_f32cx8_store(ggml_fp16_t *x, __m256 y) {
1306
  #define GGML_F16_VEC_ZERO GGML_F32x4_ZERO
1307
  #define GGML_F16_VEC_SET1 GGML_F32x4_SET1
1308
  #define GGML_F16_VEC_FMA GGML_F32x4_FMA
 
 
1309
  #define GGML_F16_VEC_REDUCE GGML_F32x4_REDUCE
1310
  // Use vec_xl, not vec_ld, in case the load address is not aligned.
1311
  #define GGML_F16_VEC_LOAD(p, i) (i & 0x1) ? \
 
1306
  #define GGML_F16_VEC_ZERO GGML_F32x4_ZERO
1307
  #define GGML_F16_VEC_SET1 GGML_F32x4_SET1
1308
  #define GGML_F16_VEC_FMA GGML_F32x4_FMA
1309
+ #define GGML_F16_VEC_ADD GGML_F32x4_ADD
1310
+ #define GGML_F16_VEC_MUL GGML_F32x4_MUL
1311
  #define GGML_F16_VEC_REDUCE GGML_F32x4_REDUCE
1312
  // Use vec_xl, not vec_ld, in case the load address is not aligned.
1313
  #define GGML_F16_VEC_LOAD(p, i) (i & 0x1) ? \