ggerganov commited on
Commit
d692b06
·
1 Parent(s): 192bda4

ggml : resolve merge (ggml/0)

Browse files
Files changed (3) hide show
  1. examples/common-ggml.cpp +2 -0
  2. ggml-metal.metal +3 -3
  3. ggml.c +4 -1
examples/common-ggml.cpp CHANGED
@@ -71,6 +71,7 @@ bool ggml_common_quantize_0(
71
  case GGML_FTYPE_MOSTLY_IQ4_NL:
72
  case GGML_FTYPE_MOSTLY_IQ4_XS:
73
  case GGML_FTYPE_MOSTLY_IQ1_M:
 
74
  {
75
  fprintf(stderr, "%s: invalid model type %d\n", __func__, ftype);
76
  return false;
@@ -207,6 +208,7 @@ bool ggml_common_quantize_0(
207
  case GGML_TYPE_IQ4_NL:
208
  case GGML_TYPE_IQ4_XS:
209
  case GGML_TYPE_IQ1_M:
 
210
  case GGML_TYPE_COUNT:
211
  {
212
  fprintf(stderr, "%s: unsupported quantization type %d (%s)\n", __func__, ttype, ggml_type_name((ggml_type) ttype));
 
71
  case GGML_FTYPE_MOSTLY_IQ4_NL:
72
  case GGML_FTYPE_MOSTLY_IQ4_XS:
73
  case GGML_FTYPE_MOSTLY_IQ1_M:
74
+ case GGML_FTYPE_MOSTLY_BF16:
75
  {
76
  fprintf(stderr, "%s: invalid model type %d\n", __func__, ftype);
77
  return false;
 
208
  case GGML_TYPE_IQ4_NL:
209
  case GGML_TYPE_IQ4_XS:
210
  case GGML_TYPE_IQ1_M:
211
+ case GGML_TYPE_BF16:
212
  case GGML_TYPE_COUNT:
213
  {
214
  fprintf(stderr, "%s: unsupported quantization type %d (%s)\n", __func__, ttype, ggml_type_name((ggml_type) ttype));
ggml-metal.metal CHANGED
@@ -296,7 +296,7 @@ kernel void kernel_silu(
296
  dst[tpig] = x / (1.0f + exp(-x));
297
  }
298
 
299
- +kernel void kernel_silu_4(
300
  device const float4 * src0,
301
  device float4 * dst,
302
  uint tpig[[thread_position_in_grid]]) {
@@ -2217,7 +2217,7 @@ kernel void kernel_flash_attn_ext_f16(
2217
 
2218
  // ALiBi
2219
  if (max_bias > 0.0f) {
2220
- const short h = iq2;
2221
 
2222
  const float base = h < n_head_log2 ? m0 : m1;
2223
  const int exph = h < n_head_log2 ? h + 1 : 2*(h - n_head_log2) + 1;
@@ -2473,7 +2473,7 @@ kernel void kernel_flash_attn_ext_vec_f16(
2473
 
2474
  // ALiBi
2475
  if (max_bias > 0.0f) {
2476
- const short h = iq2;
2477
 
2478
  const float base = h < n_head_log2 ? m0 : m1;
2479
  const int exp = h < n_head_log2 ? h + 1 : 2*(h - n_head_log2) + 1;
 
296
  dst[tpig] = x / (1.0f + exp(-x));
297
  }
298
 
299
+ kernel void kernel_silu_4(
300
  device const float4 * src0,
301
  device float4 * dst,
302
  uint tpig[[thread_position_in_grid]]) {
 
2217
 
2218
  // ALiBi
2219
  if (max_bias > 0.0f) {
2220
+ const uint32_t h = iq2;
2221
 
2222
  const float base = h < n_head_log2 ? m0 : m1;
2223
  const int exph = h < n_head_log2 ? h + 1 : 2*(h - n_head_log2) + 1;
 
2473
 
2474
  // ALiBi
2475
  if (max_bias > 0.0f) {
2476
+ const uint32_t h = iq2;
2477
 
2478
  const float base = h < n_head_log2 ? m0 : m1;
2479
  const int exp = h < n_head_log2 ? h + 1 : 2*(h - n_head_log2) + 1;
ggml.c CHANGED
@@ -4,7 +4,6 @@
4
  #include "ggml-impl.h"
5
  #include "ggml-quants.h"
6
  #include "ggml.h"
7
- #include "sgemm.h"
8
 
9
  #if defined(_MSC_VER) || defined(__MINGW32__)
10
  #include <malloc.h> // using malloc.h with MSC/MINGW
@@ -37,6 +36,10 @@
37
  #undef GGML_USE_LLAMAFILE
38
  #endif
39
 
 
 
 
 
40
  #if defined(_MSC_VER)
41
  // disable "possible loss of data" to avoid hundreds of casts
42
  // we should just be careful :)
 
4
  #include "ggml-impl.h"
5
  #include "ggml-quants.h"
6
  #include "ggml.h"
 
7
 
8
  #if defined(_MSC_VER) || defined(__MINGW32__)
9
  #include <malloc.h> // using malloc.h with MSC/MINGW
 
36
  #undef GGML_USE_LLAMAFILE
37
  #endif
38
 
39
+ #ifdef GGML_USE_LLAMAFILE
40
+ #include "sgemm.h"
41
+ #endif
42
+
43
  #if defined(_MSC_VER)
44
  // disable "possible loss of data" to avoid hundreds of casts
45
  // we should just be careful :)