slaren commited on
Commit
a512417
·
unverified ·
1 Parent(s): aa42df9

ggml-alloc : allocate all leafs as if they were inputs (ggml/731)

Browse files

* ggml-alloc : allocate all leafs as if they were inputs

* ensure static leafs are allocated

* gpt-2-backend : remove unnecesary ggml_new_tensor

* update other gpt-2 examples to remove ggml_new_tensor calls in the graph

Files changed (1) hide show
  1. ggml-alloc.c +66 -36
ggml-alloc.c CHANGED
@@ -377,6 +377,9 @@ struct ggml_gallocr {
377
 
378
  struct node_alloc * node_allocs; // [n_nodes]
379
  int n_nodes;
 
 
 
380
  };
381
 
382
  ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs) {
@@ -427,6 +430,7 @@ void ggml_gallocr_free(ggml_gallocr_t galloc) {
427
  free(galloc->buffers);
428
  free(galloc->buf_tallocs);
429
  free(galloc->node_allocs);
 
430
  free(galloc);
431
  }
432
 
@@ -544,22 +548,8 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
544
  memset(galloc->hash_set.keys, 0, galloc->hash_set.size * sizeof(struct ggml_tensor *));
545
  memset(galloc->hash_values, 0, galloc->hash_set.size * sizeof(struct hash_node));
546
 
547
- // allocate all graph inputs first to avoid overwriting them
548
- for (int i = 0; i < graph->n_nodes; i++) {
549
- if (graph->nodes[i]->flags & GGML_TENSOR_FLAG_INPUT) {
550
- ggml_gallocr_allocate_node(galloc, graph->nodes[i], get_node_buffer_id(node_buffer_ids, i));
551
- }
552
- for (int j = 0; j < GGML_MAX_SRC; j++) {
553
- if (graph->nodes[i]->src[j] == NULL) {
554
- break;
555
- }
556
- if (graph->nodes[i]->src[j]->flags & GGML_TENSOR_FLAG_INPUT) {
557
- ggml_gallocr_allocate_node(galloc, graph->nodes[i]->src[j], get_node_buffer_id(node_buffer_ids, i));
558
- }
559
- }
560
- }
561
-
562
  // count number of children and views
 
563
  for (int i = 0; i < graph->n_nodes; i++) {
564
  struct ggml_tensor * node = graph->nodes[i];
565
 
@@ -568,14 +558,37 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
568
  ggml_gallocr_hash_get(galloc, view_src)->n_views += 1;
569
  }
570
 
 
 
 
 
571
  for (int j = 0; j < GGML_MAX_SRC; j++) {
572
- struct ggml_tensor * parent = node->src[j];
573
- if (parent == NULL) {
574
  break;
575
  }
576
- ggml_gallocr_hash_get(galloc, parent)->n_children += 1;
 
 
 
 
 
 
577
  }
578
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
579
 
580
  // allocate tensors
581
  for (int i = 0; i < graph->n_nodes; i++) {
@@ -696,6 +709,18 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
696
  }
697
  }
698
  }
 
 
 
 
 
 
 
 
 
 
 
 
699
 
700
  // reallocate buffers if needed
701
  for (int i = 0; i < galloc->n_buffers; i++) {
@@ -722,8 +747,8 @@ bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph *graph) {
722
  return ggml_gallocr_reserve_n(galloc, graph, NULL);
723
  }
724
 
725
- static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor * node, struct node_alloc * node_alloc, struct tensor_alloc * tensor_alloc) {
726
- assert(node->data || node->view_src || ggml_backend_buffer_get_alloc_size(galloc->buffers[node_alloc->buffer_id], node) <= tensor_alloc->size_max);
727
 
728
  if (node->view_src != NULL) {
729
  if (node->buffer == NULL) {
@@ -732,29 +757,20 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
732
  // this tensor was allocated without ggml-backend
733
  return;
734
  }
735
- ggml_backend_view_init(galloc->buffers[node_alloc->buffer_id], node);
736
  }
737
  } else {
738
  if (node->data == NULL) {
739
  assert(tensor_alloc->offset != SIZE_MAX);
740
- assert(ggml_backend_buffer_get_alloc_size(galloc->buffers[node_alloc->buffer_id], node) <= tensor_alloc->size_max);
741
- void * base = ggml_backend_buffer_get_base(galloc->buffers[node_alloc->buffer_id]);
742
  void * addr = (char *)base + tensor_alloc->offset;
743
- ggml_backend_tensor_alloc(galloc->buffers[node_alloc->buffer_id], node, addr);
744
  } else {
745
  if (node->buffer == NULL) {
746
  // this tensor was allocated without ggml-backend
747
  return;
748
  }
749
-
750
- #ifndef NDEBUG
751
- size_t offset =
752
- (char *)node->data -
753
- (char *)ggml_backend_buffer_get_base(node->buffer);
754
- size_t size = ggml_backend_buffer_get_alloc_size(node->buffer, node);
755
- assert(tensor_alloc->offset == SIZE_MAX || offset == tensor_alloc->offset);
756
- assert(tensor_alloc->offset == SIZE_MAX || size <= tensor_alloc->size_max);
757
- #endif
758
  }
759
  }
760
  }
@@ -773,6 +789,13 @@ static bool ggml_gallocr_needs_realloc(ggml_gallocr_t galloc, struct ggml_cgraph
773
  return true;
774
  }
775
 
 
 
 
 
 
 
 
776
  for (int i = 0; i < graph->n_nodes; i++) {
777
  struct ggml_tensor * node = graph->nodes[i];
778
  struct node_alloc * node_alloc = &galloc->node_allocs[i];
@@ -827,6 +850,7 @@ bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph)
827
  }
828
 
829
  // allocate the graph tensors from the previous assignments
 
830
  for (int i = 0; i < graph->n_nodes; i++) {
831
  struct ggml_tensor * node = graph->nodes[i];
832
  struct node_alloc * node_alloc = &galloc->node_allocs[i];
@@ -835,9 +859,15 @@ bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph)
835
  if (src == NULL) {
836
  break;
837
  }
838
- ggml_gallocr_init_tensor(galloc, src, node_alloc, &node_alloc->src[j]);
839
  }
840
- ggml_gallocr_init_tensor(galloc, node, node_alloc, &node_alloc->dst);
 
 
 
 
 
 
841
  }
842
 
843
  return true;
 
377
 
378
  struct node_alloc * node_allocs; // [n_nodes]
379
  int n_nodes;
380
+
381
+ struct tensor_alloc * leaf_allocs; // [n_leafs]
382
+ int n_leafs;
383
  };
384
 
385
  ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs) {
 
430
  free(galloc->buffers);
431
  free(galloc->buf_tallocs);
432
  free(galloc->node_allocs);
433
+ free(galloc->leaf_allocs);
434
  free(galloc);
435
  }
436
 
 
548
  memset(galloc->hash_set.keys, 0, galloc->hash_set.size * sizeof(struct ggml_tensor *));
549
  memset(galloc->hash_values, 0, galloc->hash_set.size * sizeof(struct hash_node));
550
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
551
  // count number of children and views
552
+ // allocate all graph inputs and leafs first to avoid overwriting them
553
  for (int i = 0; i < graph->n_nodes; i++) {
554
  struct ggml_tensor * node = graph->nodes[i];
555
 
 
558
  ggml_gallocr_hash_get(galloc, view_src)->n_views += 1;
559
  }
560
 
561
+ if (node->flags & GGML_TENSOR_FLAG_INPUT) {
562
+ ggml_gallocr_allocate_node(galloc, graph->nodes[i], get_node_buffer_id(node_buffer_ids, i));
563
+ }
564
+
565
  for (int j = 0; j < GGML_MAX_SRC; j++) {
566
+ struct ggml_tensor * src = node->src[j];
567
+ if (src == NULL) {
568
  break;
569
  }
570
+
571
+ ggml_gallocr_hash_get(galloc, src)->n_children += 1;
572
+
573
+ // allocate explicit inputs and leafs
574
+ if (src->flags & GGML_TENSOR_FLAG_INPUT || src->op == GGML_OP_NONE) {
575
+ ggml_gallocr_allocate_node(galloc, src, get_node_buffer_id(node_buffer_ids, i));
576
+ }
577
  }
578
+ }
579
+
580
+ // allocate the remaining leafs that are unused on the graph
581
+ // these are effectively static tensors that the application is not using in the graph, but may still want to allocate for other purposes
582
+ for (int i = 0; i < graph->n_leafs; i++) {
583
+ struct ggml_tensor * leaf = graph->leafs[i];
584
+ struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf);
585
+
586
+ if (hn->n_children == 0) {
587
+ assert(!hn->allocated);
588
+ // since buffer ids are only given for nodes, these leafs are always allocated in the first buffer
589
+ ggml_gallocr_allocate_node(galloc, leaf, 0);
590
+ }
591
+ }
592
 
593
  // allocate tensors
594
  for (int i = 0; i < graph->n_nodes; i++) {
 
709
  }
710
  }
711
  }
712
+ if (galloc->n_leafs < graph->n_leafs) {
713
+ free(galloc->leaf_allocs);
714
+ galloc->leaf_allocs = calloc(sizeof(struct tensor_alloc), graph->n_leafs);
715
+ GGML_ASSERT(galloc->leaf_allocs != NULL);
716
+ }
717
+ galloc->n_leafs = graph->n_leafs;
718
+ for (int i = 0; i < graph->n_leafs; i++) {
719
+ struct ggml_tensor * leaf = graph->leafs[i];
720
+ struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf);
721
+ galloc->leaf_allocs[i].offset = hn->offset;
722
+ galloc->leaf_allocs[i].size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], leaf);
723
+ }
724
 
725
  // reallocate buffers if needed
726
  for (int i = 0; i < galloc->n_buffers; i++) {
 
747
  return ggml_gallocr_reserve_n(galloc, graph, NULL);
748
  }
749
 
750
+ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor * node, int buffer_id, struct tensor_alloc * tensor_alloc) {
751
+ assert(node->data || node->view_src || ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], node) <= tensor_alloc->size_max);
752
 
753
  if (node->view_src != NULL) {
754
  if (node->buffer == NULL) {
 
757
  // this tensor was allocated without ggml-backend
758
  return;
759
  }
760
+ ggml_backend_view_init(galloc->buffers[buffer_id], node);
761
  }
762
  } else {
763
  if (node->data == NULL) {
764
  assert(tensor_alloc->offset != SIZE_MAX);
765
+ assert(ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], node) <= tensor_alloc->size_max);
766
+ void * base = ggml_backend_buffer_get_base(galloc->buffers[buffer_id]);
767
  void * addr = (char *)base + tensor_alloc->offset;
768
+ ggml_backend_tensor_alloc(galloc->buffers[buffer_id], node, addr);
769
  } else {
770
  if (node->buffer == NULL) {
771
  // this tensor was allocated without ggml-backend
772
  return;
773
  }
 
 
 
 
 
 
 
 
 
774
  }
775
  }
776
  }
 
789
  return true;
790
  }
791
 
792
+ if (galloc->n_leafs != graph->n_leafs) {
793
+ #ifndef NDEBUG
794
+ fprintf(stderr, "%s: graph has different number of leafs\n", __func__);
795
+ #endif
796
+ return true;
797
+ }
798
+
799
  for (int i = 0; i < graph->n_nodes; i++) {
800
  struct ggml_tensor * node = graph->nodes[i];
801
  struct node_alloc * node_alloc = &galloc->node_allocs[i];
 
850
  }
851
 
852
  // allocate the graph tensors from the previous assignments
853
+ // nodes
854
  for (int i = 0; i < graph->n_nodes; i++) {
855
  struct ggml_tensor * node = graph->nodes[i];
856
  struct node_alloc * node_alloc = &galloc->node_allocs[i];
 
859
  if (src == NULL) {
860
  break;
861
  }
862
+ ggml_gallocr_init_tensor(galloc, src, node_alloc->buffer_id, &node_alloc->src[j]);
863
  }
864
+ ggml_gallocr_init_tensor(galloc, node, node_alloc->buffer_id, &node_alloc->dst);
865
+ }
866
+ // leafs
867
+ for (int i = 0; i < graph->n_leafs; i++) {
868
+ struct ggml_tensor * leaf = graph->leafs[i];
869
+ struct tensor_alloc * leaf_alloc = &galloc->leaf_allocs[i];
870
+ ggml_gallocr_init_tensor(galloc, leaf, 0, leaf_alloc);
871
  }
872
 
873
  return true;