Spaces:
Sleeping
Sleeping
llava : MobileVLM support (llama/4954)
Browse files* MobileVLM native implementation
* delete depthwise_conv_2d and permute_cpy relative code, replace the two by the existed functions, and opt ldp definition, support LLAMA_PERF option for CMake
* move android script to example/llava directory
* Fix the editor config checks
---------
Co-authored-by: Chenxiaotao03 <[email protected]>
ggml.c
CHANGED
|
@@ -1418,6 +1418,9 @@ inline static void ggml_vec_tanh_f32 (const int n, float * y, const float * x) {
|
|
| 1418 |
inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expf(x[i])-1; }
|
| 1419 |
inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
|
| 1420 |
inline static void ggml_vec_leaky_relu_f32 (const int n, float * y, const float * x, const float ns) { for (int i = 0; i < n; ++i) y[i] = ((x[i] > 0.f) ? x[i] : 0.f) + ns * ((x[i] < 0.0f) ? x[i] : 0.f); }
|
|
|
|
|
|
|
|
|
|
| 1421 |
|
| 1422 |
static const float GELU_COEF_A = 0.044715f;
|
| 1423 |
static const float GELU_QUICK_COEF = -1.702f;
|
|
@@ -1776,9 +1779,11 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
|
|
| 1776 |
"GELU",
|
| 1777 |
"GELU_QUICK",
|
| 1778 |
"SILU",
|
|
|
|
|
|
|
| 1779 |
};
|
| 1780 |
|
| 1781 |
-
static_assert(GGML_UNARY_OP_COUNT ==
|
| 1782 |
|
| 1783 |
|
| 1784 |
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
|
|
@@ -3945,6 +3950,20 @@ struct ggml_tensor * ggml_silu_back(
|
|
| 3945 |
return result;
|
| 3946 |
}
|
| 3947 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3948 |
// ggml_norm
|
| 3949 |
|
| 3950 |
static struct ggml_tensor * ggml_norm_impl(
|
|
@@ -5344,6 +5363,33 @@ GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
|
|
| 5344 |
return result;
|
| 5345 |
}
|
| 5346 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5347 |
// ggml_conv_2d
|
| 5348 |
|
| 5349 |
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
|
|
@@ -9338,6 +9384,87 @@ static void ggml_compute_forward_silu_back(
|
|
| 9338 |
}
|
| 9339 |
}
|
| 9340 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9341 |
// ggml_compute_forward_norm
|
| 9342 |
|
| 9343 |
static void ggml_compute_forward_norm_f32(
|
|
@@ -12354,6 +12481,7 @@ static void ggml_compute_forward_im2col(
|
|
| 12354 |
}
|
| 12355 |
}
|
| 12356 |
|
|
|
|
| 12357 |
// ggml_compute_forward_conv_transpose_2d
|
| 12358 |
|
| 12359 |
static void ggml_compute_forward_conv_transpose_2d(
|
|
@@ -13922,6 +14050,14 @@ static void ggml_compute_forward_unary(
|
|
| 13922 |
{
|
| 13923 |
ggml_compute_forward_silu(params, src0, dst);
|
| 13924 |
} break;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13925 |
default:
|
| 13926 |
{
|
| 13927 |
GGML_ASSERT(false);
|
|
@@ -16335,6 +16471,8 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
| 16335 |
case GGML_UNARY_OP_TANH:
|
| 16336 |
case GGML_UNARY_OP_ELU:
|
| 16337 |
case GGML_UNARY_OP_RELU:
|
|
|
|
|
|
|
| 16338 |
{
|
| 16339 |
n_tasks = 1;
|
| 16340 |
} break;
|
|
@@ -16567,7 +16705,6 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
| 16567 |
// distribute new work or execute it direct if 1T
|
| 16568 |
while (++node_n < cgraph->n_nodes) {
|
| 16569 |
GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, node_n, cgraph->n_nodes);
|
| 16570 |
-
|
| 16571 |
struct ggml_tensor * node = cgraph->nodes[node_n];
|
| 16572 |
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
| 16573 |
|
|
|
|
| 1418 |
inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expf(x[i])-1; }
|
| 1419 |
inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
|
| 1420 |
inline static void ggml_vec_leaky_relu_f32 (const int n, float * y, const float * x, const float ns) { for (int i = 0; i < n; ++i) y[i] = ((x[i] > 0.f) ? x[i] : 0.f) + ns * ((x[i] < 0.0f) ? x[i] : 0.f); }
|
| 1421 |
+
// TODO: optimize performance
|
| 1422 |
+
inline static void ggml_vec_hardswish_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i] * fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); }
|
| 1423 |
+
inline static void ggml_vec_hardsigmoid_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); }
|
| 1424 |
|
| 1425 |
static const float GELU_COEF_A = 0.044715f;
|
| 1426 |
static const float GELU_QUICK_COEF = -1.702f;
|
|
|
|
| 1779 |
"GELU",
|
| 1780 |
"GELU_QUICK",
|
| 1781 |
"SILU",
|
| 1782 |
+
"HARDSWISH",
|
| 1783 |
+
"HARDSIGMOID",
|
| 1784 |
};
|
| 1785 |
|
| 1786 |
+
static_assert(GGML_UNARY_OP_COUNT == 12, "GGML_UNARY_OP_COUNT != 12");
|
| 1787 |
|
| 1788 |
|
| 1789 |
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
|
|
|
|
| 3950 |
return result;
|
| 3951 |
}
|
| 3952 |
|
| 3953 |
+
// ggml hardswish
|
| 3954 |
+
struct ggml_tensor * ggml_hardswish(
|
| 3955 |
+
struct ggml_context * ctx,
|
| 3956 |
+
struct ggml_tensor * a) {
|
| 3957 |
+
return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSWISH);
|
| 3958 |
+
}
|
| 3959 |
+
|
| 3960 |
+
// ggml hardsigmoid
|
| 3961 |
+
struct ggml_tensor * ggml_hardsigmoid(
|
| 3962 |
+
struct ggml_context * ctx,
|
| 3963 |
+
struct ggml_tensor * a) {
|
| 3964 |
+
return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSIGMOID);
|
| 3965 |
+
}
|
| 3966 |
+
|
| 3967 |
// ggml_norm
|
| 3968 |
|
| 3969 |
static struct ggml_tensor * ggml_norm_impl(
|
|
|
|
| 5363 |
return result;
|
| 5364 |
}
|
| 5365 |
|
| 5366 |
+
// ggml_conv_depthwise
|
| 5367 |
+
struct ggml_tensor * ggml_conv_depthwise_2d(
|
| 5368 |
+
struct ggml_context * ctx,
|
| 5369 |
+
struct ggml_tensor * a,
|
| 5370 |
+
struct ggml_tensor * b,
|
| 5371 |
+
struct ggml_tensor * c,
|
| 5372 |
+
int s0,
|
| 5373 |
+
int s1,
|
| 5374 |
+
int p0,
|
| 5375 |
+
int p1,
|
| 5376 |
+
int d0,
|
| 5377 |
+
int d1) {
|
| 5378 |
+
|
| 5379 |
+
struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
|
| 5380 |
+
struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
|
| 5381 |
+
ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
|
| 5382 |
+
s0, s1, p0, p1, d0, d1, true); // [N * IC, OH, OW, KH * KW]
|
| 5383 |
+
|
| 5384 |
+
struct ggml_tensor * result =
|
| 5385 |
+
ggml_mul_mat(ctx,
|
| 5386 |
+
ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1), // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
|
| 5387 |
+
ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3])); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
|
| 5388 |
+
|
| 5389 |
+
result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
|
| 5390 |
+
|
| 5391 |
+
return result;
|
| 5392 |
+
}
|
| 5393 |
// ggml_conv_2d
|
| 5394 |
|
| 5395 |
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
|
|
|
|
| 9384 |
}
|
| 9385 |
}
|
| 9386 |
|
| 9387 |
+
|
| 9388 |
+
static void ggml_compute_forward_hardswish_f32(
|
| 9389 |
+
const struct ggml_compute_params * params,
|
| 9390 |
+
const struct ggml_tensor * src0,
|
| 9391 |
+
struct ggml_tensor * dst) {
|
| 9392 |
+
assert(params->ith == 0);
|
| 9393 |
+
assert(ggml_are_same_shape(src0, dst));
|
| 9394 |
+
|
| 9395 |
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
| 9396 |
+
return;
|
| 9397 |
+
}
|
| 9398 |
+
|
| 9399 |
+
const int n = ggml_nrows(src0);
|
| 9400 |
+
const int nc = src0->ne[0];
|
| 9401 |
+
|
| 9402 |
+
assert(dst->nb[0] == sizeof(float));
|
| 9403 |
+
assert(src0->nb[0] == sizeof(float));
|
| 9404 |
+
|
| 9405 |
+
for (int i = 0; i < n; i++) {
|
| 9406 |
+
ggml_vec_hardswish_f32(nc,
|
| 9407 |
+
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
| 9408 |
+
(float *) ((char *) src0->data + i*(src0->nb[1])));
|
| 9409 |
+
}
|
| 9410 |
+
}
|
| 9411 |
+
static void ggml_compute_forward_hardswish(
|
| 9412 |
+
const struct ggml_compute_params * params,
|
| 9413 |
+
const struct ggml_tensor * src0,
|
| 9414 |
+
struct ggml_tensor * dst) {
|
| 9415 |
+
switch (src0->type) {
|
| 9416 |
+
case GGML_TYPE_F32:
|
| 9417 |
+
{
|
| 9418 |
+
ggml_compute_forward_hardswish_f32(params, src0, dst);
|
| 9419 |
+
} break;
|
| 9420 |
+
default:
|
| 9421 |
+
{
|
| 9422 |
+
GGML_ASSERT(false);
|
| 9423 |
+
} break;
|
| 9424 |
+
}
|
| 9425 |
+
}
|
| 9426 |
+
|
| 9427 |
+
static void ggml_compute_forward_hardsigmoid_f32(
|
| 9428 |
+
const struct ggml_compute_params * params,
|
| 9429 |
+
const struct ggml_tensor * src0,
|
| 9430 |
+
struct ggml_tensor * dst) {
|
| 9431 |
+
assert(params->ith == 0);
|
| 9432 |
+
assert(ggml_are_same_shape(src0, dst));
|
| 9433 |
+
|
| 9434 |
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
| 9435 |
+
return;
|
| 9436 |
+
}
|
| 9437 |
+
|
| 9438 |
+
const int n = ggml_nrows(src0);
|
| 9439 |
+
const int nc = src0->ne[0];
|
| 9440 |
+
|
| 9441 |
+
assert(dst->nb[0] == sizeof(float));
|
| 9442 |
+
assert(src0->nb[0] == sizeof(float));
|
| 9443 |
+
|
| 9444 |
+
for (int i = 0; i < n; i++) {
|
| 9445 |
+
ggml_vec_hardsigmoid_f32(nc,
|
| 9446 |
+
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
| 9447 |
+
(float *) ((char *) src0->data + i*(src0->nb[1])));
|
| 9448 |
+
}
|
| 9449 |
+
}
|
| 9450 |
+
|
| 9451 |
+
static void ggml_compute_forward_hardsigmoid(
|
| 9452 |
+
const struct ggml_compute_params * params,
|
| 9453 |
+
const struct ggml_tensor * src0,
|
| 9454 |
+
struct ggml_tensor * dst) {
|
| 9455 |
+
switch (src0->type) {
|
| 9456 |
+
case GGML_TYPE_F32:
|
| 9457 |
+
{
|
| 9458 |
+
ggml_compute_forward_hardsigmoid_f32(params, src0, dst);
|
| 9459 |
+
} break;
|
| 9460 |
+
default:
|
| 9461 |
+
{
|
| 9462 |
+
GGML_ASSERT(false);
|
| 9463 |
+
} break;
|
| 9464 |
+
}
|
| 9465 |
+
}
|
| 9466 |
+
|
| 9467 |
+
|
| 9468 |
// ggml_compute_forward_norm
|
| 9469 |
|
| 9470 |
static void ggml_compute_forward_norm_f32(
|
|
|
|
| 12481 |
}
|
| 12482 |
}
|
| 12483 |
|
| 12484 |
+
|
| 12485 |
// ggml_compute_forward_conv_transpose_2d
|
| 12486 |
|
| 12487 |
static void ggml_compute_forward_conv_transpose_2d(
|
|
|
|
| 14050 |
{
|
| 14051 |
ggml_compute_forward_silu(params, src0, dst);
|
| 14052 |
} break;
|
| 14053 |
+
case GGML_UNARY_OP_HARDSWISH:
|
| 14054 |
+
{
|
| 14055 |
+
ggml_compute_forward_hardswish(params, src0, dst);
|
| 14056 |
+
} break;
|
| 14057 |
+
case GGML_UNARY_OP_HARDSIGMOID:
|
| 14058 |
+
{
|
| 14059 |
+
ggml_compute_forward_hardsigmoid(params, src0, dst);
|
| 14060 |
+
} break;
|
| 14061 |
default:
|
| 14062 |
{
|
| 14063 |
GGML_ASSERT(false);
|
|
|
|
| 16471 |
case GGML_UNARY_OP_TANH:
|
| 16472 |
case GGML_UNARY_OP_ELU:
|
| 16473 |
case GGML_UNARY_OP_RELU:
|
| 16474 |
+
case GGML_UNARY_OP_HARDSWISH: // to opt for multiple threads
|
| 16475 |
+
case GGML_UNARY_OP_HARDSIGMOID: // to opt for multiple threads
|
| 16476 |
{
|
| 16477 |
n_tasks = 1;
|
| 16478 |
} break;
|
|
|
|
| 16705 |
// distribute new work or execute it direct if 1T
|
| 16706 |
while (++node_n < cgraph->n_nodes) {
|
| 16707 |
GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, node_n, cgraph->n_nodes);
|
|
|
|
| 16708 |
struct ggml_tensor * node = cgraph->nodes[node_n];
|
| 16709 |
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
| 16710 |
|
ggml.h
CHANGED
|
@@ -489,6 +489,8 @@ extern "C" {
|
|
| 489 |
GGML_UNARY_OP_GELU,
|
| 490 |
GGML_UNARY_OP_GELU_QUICK,
|
| 491 |
GGML_UNARY_OP_SILU,
|
|
|
|
|
|
|
| 492 |
|
| 493 |
GGML_UNARY_OP_COUNT,
|
| 494 |
};
|
|
@@ -1032,6 +1034,16 @@ extern "C" {
|
|
| 1032 |
struct ggml_tensor * a,
|
| 1033 |
struct ggml_tensor * b);
|
| 1034 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1035 |
// normalize along rows
|
| 1036 |
GGML_API struct ggml_tensor * ggml_norm(
|
| 1037 |
struct ggml_context * ctx,
|
|
@@ -1483,6 +1495,18 @@ extern "C" {
|
|
| 1483 |
int d1,
|
| 1484 |
bool is_2D);
|
| 1485 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1486 |
GGML_API struct ggml_tensor * ggml_conv_1d(
|
| 1487 |
struct ggml_context * ctx,
|
| 1488 |
struct ggml_tensor * a,
|
|
|
|
| 489 |
GGML_UNARY_OP_GELU,
|
| 490 |
GGML_UNARY_OP_GELU_QUICK,
|
| 491 |
GGML_UNARY_OP_SILU,
|
| 492 |
+
GGML_UNARY_OP_HARDSWISH,
|
| 493 |
+
GGML_UNARY_OP_HARDSIGMOID,
|
| 494 |
|
| 495 |
GGML_UNARY_OP_COUNT,
|
| 496 |
};
|
|
|
|
| 1034 |
struct ggml_tensor * a,
|
| 1035 |
struct ggml_tensor * b);
|
| 1036 |
|
| 1037 |
+
// hardswish(x) = x * relu6(x + 3) / 6
|
| 1038 |
+
GGML_API struct ggml_tensor * ggml_hardswish(
|
| 1039 |
+
struct ggml_context * ctx,
|
| 1040 |
+
struct ggml_tensor * a);
|
| 1041 |
+
|
| 1042 |
+
// hardsigmoid(x) = relu6(x + 3) / 6
|
| 1043 |
+
GGML_API struct ggml_tensor * ggml_hardsigmoid(
|
| 1044 |
+
struct ggml_context * ctx,
|
| 1045 |
+
struct ggml_tensor * a);
|
| 1046 |
+
|
| 1047 |
// normalize along rows
|
| 1048 |
GGML_API struct ggml_tensor * ggml_norm(
|
| 1049 |
struct ggml_context * ctx,
|
|
|
|
| 1495 |
int d1,
|
| 1496 |
bool is_2D);
|
| 1497 |
|
| 1498 |
+
GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
|
| 1499 |
+
struct ggml_context * ctx,
|
| 1500 |
+
struct ggml_tensor * a,
|
| 1501 |
+
struct ggml_tensor * b,
|
| 1502 |
+
struct ggml_tensor * c,
|
| 1503 |
+
int s0,
|
| 1504 |
+
int s1,
|
| 1505 |
+
int p0,
|
| 1506 |
+
int p1,
|
| 1507 |
+
int d0,
|
| 1508 |
+
int d1);
|
| 1509 |
+
|
| 1510 |
GGML_API struct ggml_tensor * ggml_conv_1d(
|
| 1511 |
struct ggml_context * ctx,
|
| 1512 |
struct ggml_tensor * a,
|