slaren commited on
Commit
2645c33
·
unverified ·
1 Parent(s): 0c9c434

ggml : limit n_threads to the max n_tasks (llama/5238)

Browse files
Files changed (1) hide show
  1. ggml.c +5 -1
ggml.c CHANGED
@@ -16985,12 +16985,16 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
16985
  struct ggml_cplan cplan;
16986
  memset(&cplan, 0, sizeof(struct ggml_cplan));
16987
 
 
 
16988
  // thread scheduling for the different operations + work buffer size estimation
16989
  for (int i = 0; i < cgraph->n_nodes; i++) {
16990
  struct ggml_tensor * node = cgraph->nodes[i];
16991
 
16992
  const int n_tasks = ggml_get_n_tasks(node, n_threads);
16993
 
 
 
16994
  size_t cur = 0;
16995
 
16996
  switch (node->op) {
@@ -17157,7 +17161,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
17157
  work_size += CACHE_LINE_SIZE*(n_threads - 1);
17158
  }
17159
 
17160
- cplan.n_threads = n_threads;
17161
  cplan.work_size = work_size;
17162
  cplan.work_data = NULL;
17163
 
 
16985
  struct ggml_cplan cplan;
16986
  memset(&cplan, 0, sizeof(struct ggml_cplan));
16987
 
16988
+ int max_tasks = 1;
16989
+
16990
  // thread scheduling for the different operations + work buffer size estimation
16991
  for (int i = 0; i < cgraph->n_nodes; i++) {
16992
  struct ggml_tensor * node = cgraph->nodes[i];
16993
 
16994
  const int n_tasks = ggml_get_n_tasks(node, n_threads);
16995
 
16996
+ max_tasks = MAX(max_tasks, n_tasks);
16997
+
16998
  size_t cur = 0;
16999
 
17000
  switch (node->op) {
 
17161
  work_size += CACHE_LINE_SIZE*(n_threads - 1);
17162
  }
17163
 
17164
+ cplan.n_threads = MIN(max_tasks, n_threads);
17165
  cplan.work_size = work_size;
17166
  cplan.work_data = NULL;
17167