ggerganov commited on
Commit
9705bb5
·
1 Parent(s): 0d2e888

repo : update links to new url (llama/11886)

Browse files

* repo : update links to new url

ggml-ci

* cont : more urls

ggml-ci

ggml/include/ggml-cpu.h CHANGED
@@ -8,7 +8,7 @@ extern "C" {
8
  #endif
9
 
10
  // the compute plan that needs to be prepared for ggml_graph_compute()
11
- // since https://github.com/ggerganov/ggml/issues/287
12
  struct ggml_cplan {
13
  size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
14
  uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
 
8
  #endif
9
 
10
  // the compute plan that needs to be prepared for ggml_graph_compute()
11
+ // since https://github.com/ggml-org/ggml/issues/287
12
  struct ggml_cplan {
13
  size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
14
  uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
ggml/include/ggml-metal.h CHANGED
@@ -45,7 +45,7 @@ GGML_BACKEND_API bool ggml_backend_is_metal(ggml_backend_t backend);
45
 
46
  GGML_DEPRECATED(
47
  GGML_BACKEND_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
48
- "obsoleted by the new device interface - https://github.com/ggerganov/llama.cpp/pull/9713");
49
 
50
  GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
51
 
 
45
 
46
  GGML_DEPRECATED(
47
  GGML_BACKEND_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
48
+ "obsoleted by the new device interface - https://github.com/ggml-org/llama.cpp/pull/9713");
49
 
50
  GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
51
 
ggml/src/ggml-cpu/ggml-cpu.c CHANGED
@@ -1816,7 +1816,7 @@ inline static float ggml_silu_f32(float x) {
1816
 
1817
  #if __FINITE_MATH_ONLY__
1818
  #error "some routines in ggml.c require non-finite math arithmetics -- pass -fno-finite-math-only to the compiler to fix"
1819
- #error "ref: https://github.com/ggerganov/llama.cpp/pull/7154#issuecomment-2143844461"
1820
  #endif
1821
 
1822
  #if defined(__ARM_NEON) && defined(__aarch64__)
@@ -7574,7 +7574,7 @@ UseGgmlGemm2:;
7574
  int64_t nchunk1 = (nr1 + chunk_size - 1) / chunk_size;
7575
 
7576
  // If the chunking is poor for the number of threads on this setup, scrap the whole plan. Re-chunk it by thread.
7577
- // Also, chunking by thread was measured to have perform better on NUMA systems. See https://github.com/ggerganov/llama.cpp/pull/6915
7578
  // In theory, chunking should be just as useful on NUMA and non NUMA systems, but testing disagreed with that.
7579
  if (nchunk0 * nchunk1 < nth * 4 || ggml_is_numa()) {
7580
  // distribute the thread work across the inner or outer loop based on which one is larger
 
1816
 
1817
  #if __FINITE_MATH_ONLY__
1818
  #error "some routines in ggml.c require non-finite math arithmetics -- pass -fno-finite-math-only to the compiler to fix"
1819
+ #error "ref: https://github.com/ggml-org/llama.cpp/pull/7154#issuecomment-2143844461"
1820
  #endif
1821
 
1822
  #if defined(__ARM_NEON) && defined(__aarch64__)
 
7574
  int64_t nchunk1 = (nr1 + chunk_size - 1) / chunk_size;
7575
 
7576
  // If the chunking is poor for the number of threads on this setup, scrap the whole plan. Re-chunk it by thread.
7577
+ // Also, chunking by thread was measured to have perform better on NUMA systems. See https://github.com/ggml-org/llama.cpp/pull/6915
7578
  // In theory, chunking should be just as useful on NUMA and non NUMA systems, but testing disagreed with that.
7579
  if (nchunk0 * nchunk1 < nth * 4 || ggml_is_numa()) {
7580
  // distribute the thread work across the inner or outer loop based on which one is larger
ggml/src/ggml-metal/ggml-metal.m CHANGED
@@ -1983,7 +1983,7 @@ static void ggml_metal_encode_node(
1983
  const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2);
1984
 
1985
  // TODO: add ggml_metal_kargs struct
1986
- // TODO: optimize (see https://github.com/ggerganov/llama.cpp/pull/10238/commits/7941b6b9ec29a2866fec6fa6c51612515ca509f6)
1987
  [encoder setComputePipelineState:pipeline];
1988
  [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
1989
  if (id_src1) {
 
1983
  const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2);
1984
 
1985
  // TODO: add ggml_metal_kargs struct
1986
+ // TODO: optimize (see https://github.com/ggml-org/llama.cpp/pull/10238/commits/7941b6b9ec29a2866fec6fa6c51612515ca509f6)
1987
  [encoder setComputePipelineState:pipeline];
1988
  [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
1989
  if (id_src1) {
ggml/src/ggml-metal/ggml-metal.metal CHANGED
@@ -1058,7 +1058,7 @@ kernel void kernel_soft_max(
1058
  }
1059
 
1060
  // This barrier fixes a failing test
1061
- // ref: https://github.com/ggerganov/ggml/pull/621#discussion_r1425156335
1062
  threadgroup_barrier(mem_flags::mem_none);
1063
 
1064
  float sum = simd_sum(lsum);
@@ -1163,7 +1163,7 @@ kernel void kernel_soft_max_4(
1163
  const float lsum = lsum4[0] + lsum4[1] + lsum4[2] + lsum4[3];
1164
 
1165
  // This barrier fixes a failing test
1166
- // ref: https://github.com/ggerganov/ggml/pull/621#discussion_r1425156335
1167
  threadgroup_barrier(mem_flags::mem_none);
1168
 
1169
  float sum = simd_sum(lsum);
 
1058
  }
1059
 
1060
  // This barrier fixes a failing test
1061
+ // ref: https://github.com/ggml-org/ggml/pull/621#discussion_r1425156335
1062
  threadgroup_barrier(mem_flags::mem_none);
1063
 
1064
  float sum = simd_sum(lsum);
 
1163
  const float lsum = lsum4[0] + lsum4[1] + lsum4[2] + lsum4[3];
1164
 
1165
  // This barrier fixes a failing test
1166
+ // ref: https://github.com/ggml-org/ggml/pull/621#discussion_r1425156335
1167
  threadgroup_barrier(mem_flags::mem_none);
1168
 
1169
  float sum = simd_sum(lsum);