cxt123 Chenxiaotao03 commited on
Commit
dc8f956
·
unverified ·
1 Parent(s): 97ce95c

llava : MobileVLM support (llama/4954)

Browse files

* MobileVLM native implementation

* delete depthwise_conv_2d and permute_cpy relative code, replace the two by the existed functions, and opt ldp definition, support LLAMA_PERF option for CMake

* move android script to example/llava directory

* Fix the editor config checks

---------

Co-authored-by: Chenxiaotao03 <[email protected]>

Files changed (2) hide show
  1. ggml.c +139 -2
  2. ggml.h +24 -0
ggml.c CHANGED
@@ -1418,6 +1418,9 @@ inline static void ggml_vec_tanh_f32 (const int n, float * y, const float * x) {
1418
  inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expf(x[i])-1; }
1419
  inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
1420
  inline static void ggml_vec_leaky_relu_f32 (const int n, float * y, const float * x, const float ns) { for (int i = 0; i < n; ++i) y[i] = ((x[i] > 0.f) ? x[i] : 0.f) + ns * ((x[i] < 0.0f) ? x[i] : 0.f); }
 
 
 
1421
 
1422
  static const float GELU_COEF_A = 0.044715f;
1423
  static const float GELU_QUICK_COEF = -1.702f;
@@ -1776,9 +1779,11 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
1776
  "GELU",
1777
  "GELU_QUICK",
1778
  "SILU",
 
 
1779
  };
1780
 
1781
- static_assert(GGML_UNARY_OP_COUNT == 10, "GGML_UNARY_OP_COUNT != 10");
1782
 
1783
 
1784
  static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
@@ -3945,6 +3950,20 @@ struct ggml_tensor * ggml_silu_back(
3945
  return result;
3946
  }
3947
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3948
  // ggml_norm
3949
 
3950
  static struct ggml_tensor * ggml_norm_impl(
@@ -5344,6 +5363,33 @@ GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
5344
  return result;
5345
  }
5346
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5347
  // ggml_conv_2d
5348
 
5349
  // im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
@@ -9338,6 +9384,87 @@ static void ggml_compute_forward_silu_back(
9338
  }
9339
  }
9340
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9341
  // ggml_compute_forward_norm
9342
 
9343
  static void ggml_compute_forward_norm_f32(
@@ -12354,6 +12481,7 @@ static void ggml_compute_forward_im2col(
12354
  }
12355
  }
12356
 
 
12357
  // ggml_compute_forward_conv_transpose_2d
12358
 
12359
  static void ggml_compute_forward_conv_transpose_2d(
@@ -13922,6 +14050,14 @@ static void ggml_compute_forward_unary(
13922
  {
13923
  ggml_compute_forward_silu(params, src0, dst);
13924
  } break;
 
 
 
 
 
 
 
 
13925
  default:
13926
  {
13927
  GGML_ASSERT(false);
@@ -16335,6 +16471,8 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
16335
  case GGML_UNARY_OP_TANH:
16336
  case GGML_UNARY_OP_ELU:
16337
  case GGML_UNARY_OP_RELU:
 
 
16338
  {
16339
  n_tasks = 1;
16340
  } break;
@@ -16567,7 +16705,6 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
16567
  // distribute new work or execute it direct if 1T
16568
  while (++node_n < cgraph->n_nodes) {
16569
  GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, node_n, cgraph->n_nodes);
16570
-
16571
  struct ggml_tensor * node = cgraph->nodes[node_n];
16572
  const int n_tasks = ggml_get_n_tasks(node, n_threads);
16573
 
 
1418
  inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expf(x[i])-1; }
1419
  inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
1420
  inline static void ggml_vec_leaky_relu_f32 (const int n, float * y, const float * x, const float ns) { for (int i = 0; i < n; ++i) y[i] = ((x[i] > 0.f) ? x[i] : 0.f) + ns * ((x[i] < 0.0f) ? x[i] : 0.f); }
1421
+ // TODO: optimize performance
1422
+ inline static void ggml_vec_hardswish_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i] * fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); }
1423
+ inline static void ggml_vec_hardsigmoid_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); }
1424
 
1425
  static const float GELU_COEF_A = 0.044715f;
1426
  static const float GELU_QUICK_COEF = -1.702f;
 
1779
  "GELU",
1780
  "GELU_QUICK",
1781
  "SILU",
1782
+ "HARDSWISH",
1783
+ "HARDSIGMOID",
1784
  };
1785
 
1786
+ static_assert(GGML_UNARY_OP_COUNT == 12, "GGML_UNARY_OP_COUNT != 12");
1787
 
1788
 
1789
  static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
 
3950
  return result;
3951
  }
3952
 
3953
+ // ggml hardswish
3954
+ struct ggml_tensor * ggml_hardswish(
3955
+ struct ggml_context * ctx,
3956
+ struct ggml_tensor * a) {
3957
+ return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSWISH);
3958
+ }
3959
+
3960
+ // ggml hardsigmoid
3961
+ struct ggml_tensor * ggml_hardsigmoid(
3962
+ struct ggml_context * ctx,
3963
+ struct ggml_tensor * a) {
3964
+ return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSIGMOID);
3965
+ }
3966
+
3967
  // ggml_norm
3968
 
3969
  static struct ggml_tensor * ggml_norm_impl(
 
5363
  return result;
5364
  }
5365
 
5366
+ // ggml_conv_depthwise
5367
+ struct ggml_tensor * ggml_conv_depthwise_2d(
5368
+ struct ggml_context * ctx,
5369
+ struct ggml_tensor * a,
5370
+ struct ggml_tensor * b,
5371
+ struct ggml_tensor * c,
5372
+ int s0,
5373
+ int s1,
5374
+ int p0,
5375
+ int p1,
5376
+ int d0,
5377
+ int d1) {
5378
+
5379
+ struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
5380
+ struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
5381
+ ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
5382
+ s0, s1, p0, p1, d0, d1, true); // [N * IC, OH, OW, KH * KW]
5383
+
5384
+ struct ggml_tensor * result =
5385
+ ggml_mul_mat(ctx,
5386
+ ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1), // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
5387
+ ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3])); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
5388
+
5389
+ result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
5390
+
5391
+ return result;
5392
+ }
5393
  // ggml_conv_2d
5394
 
5395
  // im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
 
9384
  }
9385
  }
9386
 
9387
+
9388
+ static void ggml_compute_forward_hardswish_f32(
9389
+ const struct ggml_compute_params * params,
9390
+ const struct ggml_tensor * src0,
9391
+ struct ggml_tensor * dst) {
9392
+ assert(params->ith == 0);
9393
+ assert(ggml_are_same_shape(src0, dst));
9394
+
9395
+ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
9396
+ return;
9397
+ }
9398
+
9399
+ const int n = ggml_nrows(src0);
9400
+ const int nc = src0->ne[0];
9401
+
9402
+ assert(dst->nb[0] == sizeof(float));
9403
+ assert(src0->nb[0] == sizeof(float));
9404
+
9405
+ for (int i = 0; i < n; i++) {
9406
+ ggml_vec_hardswish_f32(nc,
9407
+ (float *) ((char *) dst->data + i*( dst->nb[1])),
9408
+ (float *) ((char *) src0->data + i*(src0->nb[1])));
9409
+ }
9410
+ }
9411
+ static void ggml_compute_forward_hardswish(
9412
+ const struct ggml_compute_params * params,
9413
+ const struct ggml_tensor * src0,
9414
+ struct ggml_tensor * dst) {
9415
+ switch (src0->type) {
9416
+ case GGML_TYPE_F32:
9417
+ {
9418
+ ggml_compute_forward_hardswish_f32(params, src0, dst);
9419
+ } break;
9420
+ default:
9421
+ {
9422
+ GGML_ASSERT(false);
9423
+ } break;
9424
+ }
9425
+ }
9426
+
9427
+ static void ggml_compute_forward_hardsigmoid_f32(
9428
+ const struct ggml_compute_params * params,
9429
+ const struct ggml_tensor * src0,
9430
+ struct ggml_tensor * dst) {
9431
+ assert(params->ith == 0);
9432
+ assert(ggml_are_same_shape(src0, dst));
9433
+
9434
+ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
9435
+ return;
9436
+ }
9437
+
9438
+ const int n = ggml_nrows(src0);
9439
+ const int nc = src0->ne[0];
9440
+
9441
+ assert(dst->nb[0] == sizeof(float));
9442
+ assert(src0->nb[0] == sizeof(float));
9443
+
9444
+ for (int i = 0; i < n; i++) {
9445
+ ggml_vec_hardsigmoid_f32(nc,
9446
+ (float *) ((char *) dst->data + i*( dst->nb[1])),
9447
+ (float *) ((char *) src0->data + i*(src0->nb[1])));
9448
+ }
9449
+ }
9450
+
9451
+ static void ggml_compute_forward_hardsigmoid(
9452
+ const struct ggml_compute_params * params,
9453
+ const struct ggml_tensor * src0,
9454
+ struct ggml_tensor * dst) {
9455
+ switch (src0->type) {
9456
+ case GGML_TYPE_F32:
9457
+ {
9458
+ ggml_compute_forward_hardsigmoid_f32(params, src0, dst);
9459
+ } break;
9460
+ default:
9461
+ {
9462
+ GGML_ASSERT(false);
9463
+ } break;
9464
+ }
9465
+ }
9466
+
9467
+
9468
  // ggml_compute_forward_norm
9469
 
9470
  static void ggml_compute_forward_norm_f32(
 
12481
  }
12482
  }
12483
 
12484
+
12485
  // ggml_compute_forward_conv_transpose_2d
12486
 
12487
  static void ggml_compute_forward_conv_transpose_2d(
 
14050
  {
14051
  ggml_compute_forward_silu(params, src0, dst);
14052
  } break;
14053
+ case GGML_UNARY_OP_HARDSWISH:
14054
+ {
14055
+ ggml_compute_forward_hardswish(params, src0, dst);
14056
+ } break;
14057
+ case GGML_UNARY_OP_HARDSIGMOID:
14058
+ {
14059
+ ggml_compute_forward_hardsigmoid(params, src0, dst);
14060
+ } break;
14061
  default:
14062
  {
14063
  GGML_ASSERT(false);
 
16471
  case GGML_UNARY_OP_TANH:
16472
  case GGML_UNARY_OP_ELU:
16473
  case GGML_UNARY_OP_RELU:
16474
+ case GGML_UNARY_OP_HARDSWISH: // to opt for multiple threads
16475
+ case GGML_UNARY_OP_HARDSIGMOID: // to opt for multiple threads
16476
  {
16477
  n_tasks = 1;
16478
  } break;
 
16705
  // distribute new work or execute it direct if 1T
16706
  while (++node_n < cgraph->n_nodes) {
16707
  GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, node_n, cgraph->n_nodes);
 
16708
  struct ggml_tensor * node = cgraph->nodes[node_n];
16709
  const int n_tasks = ggml_get_n_tasks(node, n_threads);
16710
 
ggml.h CHANGED
@@ -489,6 +489,8 @@ extern "C" {
489
  GGML_UNARY_OP_GELU,
490
  GGML_UNARY_OP_GELU_QUICK,
491
  GGML_UNARY_OP_SILU,
 
 
492
 
493
  GGML_UNARY_OP_COUNT,
494
  };
@@ -1032,6 +1034,16 @@ extern "C" {
1032
  struct ggml_tensor * a,
1033
  struct ggml_tensor * b);
1034
 
 
 
 
 
 
 
 
 
 
 
1035
  // normalize along rows
1036
  GGML_API struct ggml_tensor * ggml_norm(
1037
  struct ggml_context * ctx,
@@ -1483,6 +1495,18 @@ extern "C" {
1483
  int d1,
1484
  bool is_2D);
1485
 
 
 
 
 
 
 
 
 
 
 
 
 
1486
  GGML_API struct ggml_tensor * ggml_conv_1d(
1487
  struct ggml_context * ctx,
1488
  struct ggml_tensor * a,
 
489
  GGML_UNARY_OP_GELU,
490
  GGML_UNARY_OP_GELU_QUICK,
491
  GGML_UNARY_OP_SILU,
492
+ GGML_UNARY_OP_HARDSWISH,
493
+ GGML_UNARY_OP_HARDSIGMOID,
494
 
495
  GGML_UNARY_OP_COUNT,
496
  };
 
1034
  struct ggml_tensor * a,
1035
  struct ggml_tensor * b);
1036
 
1037
+ // hardswish(x) = x * relu6(x + 3) / 6
1038
+ GGML_API struct ggml_tensor * ggml_hardswish(
1039
+ struct ggml_context * ctx,
1040
+ struct ggml_tensor * a);
1041
+
1042
+ // hardsigmoid(x) = relu6(x + 3) / 6
1043
+ GGML_API struct ggml_tensor * ggml_hardsigmoid(
1044
+ struct ggml_context * ctx,
1045
+ struct ggml_tensor * a);
1046
+
1047
  // normalize along rows
1048
  GGML_API struct ggml_tensor * ggml_norm(
1049
  struct ggml_context * ctx,
 
1495
  int d1,
1496
  bool is_2D);
1497
 
1498
+ GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
1499
+ struct ggml_context * ctx,
1500
+ struct ggml_tensor * a,
1501
+ struct ggml_tensor * b,
1502
+ struct ggml_tensor * c,
1503
+ int s0,
1504
+ int s1,
1505
+ int p0,
1506
+ int p1,
1507
+ int d0,
1508
+ int d1);
1509
+
1510
  GGML_API struct ggml_tensor * ggml_conv_1d(
1511
  struct ggml_context * ctx,
1512
  struct ggml_tensor * a,