Spaces:
Running
Running
ggml : sync latest ggml
Browse files
ggml.c
CHANGED
|
@@ -2712,9 +2712,12 @@ static const char * GGML_OP_LABEL[GGML_OP_COUNT] = {
|
|
| 2712 |
|
| 2713 |
"FLASH_ATTN",
|
| 2714 |
"FLASH_FF",
|
|
|
|
|
|
|
|
|
|
| 2715 |
};
|
| 2716 |
|
| 2717 |
-
static_assert(GGML_OP_COUNT ==
|
| 2718 |
|
| 2719 |
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
| 2720 |
"none",
|
|
@@ -2757,9 +2760,12 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
| 2757 |
|
| 2758 |
"flash_attn(x)",
|
| 2759 |
"flash_ff(x)",
|
|
|
|
|
|
|
|
|
|
| 2760 |
};
|
| 2761 |
|
| 2762 |
-
static_assert(GGML_OP_COUNT ==
|
| 2763 |
|
| 2764 |
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
|
| 2765 |
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
|
|
@@ -3054,9 +3060,11 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|
| 3054 |
return NULL;
|
| 3055 |
}
|
| 3056 |
|
|
|
|
|
|
|
| 3057 |
*ctx = (struct ggml_context) {
|
| 3058 |
-
/*.mem_size =*/
|
| 3059 |
-
/*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(
|
| 3060 |
/*.mem_buffer_owned =*/ params.mem_buffer ? false : true,
|
| 3061 |
/*.no_alloc =*/ params.no_alloc,
|
| 3062 |
/*.n_objects =*/ 0,
|
|
@@ -3066,7 +3074,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|
| 3066 |
/*.scratch_save =*/ { 0, 0, NULL, },
|
| 3067 |
};
|
| 3068 |
|
| 3069 |
-
GGML_ASSERT(ctx->mem_buffer != NULL);
|
| 3070 |
|
| 3071 |
ggml_assert_aligned(ctx->mem_buffer);
|
| 3072 |
|
|
@@ -4905,6 +4913,90 @@ struct ggml_tensor * ggml_flash_ff(
|
|
| 4905 |
return result;
|
| 4906 |
}
|
| 4907 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4908 |
////////////////////////////////////////////////////////////////////////////////
|
| 4909 |
|
| 4910 |
void ggml_set_param(
|
|
@@ -7507,6 +7599,8 @@ static void ggml_compute_forward_rope_f32(
|
|
| 7507 |
// row index used to determine which thread to use
|
| 7508 |
int ir = 0;
|
| 7509 |
|
|
|
|
|
|
|
| 7510 |
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
| 7511 |
for (int64_t i2 = (mode == 0 ? 0 : n_past); i2 < ne2; i2++) {
|
| 7512 |
const int p = (mode == 0 ? n_past + i2 : i2);
|
|
@@ -7514,11 +7608,13 @@ static void ggml_compute_forward_rope_f32(
|
|
| 7514 |
if (ir++ < ir0) continue;
|
| 7515 |
if (ir > ir1) break;
|
| 7516 |
|
|
|
|
|
|
|
| 7517 |
for (int i0 = 0; i0 < n_dims; i0 += 2) {
|
| 7518 |
-
const float
|
|
|
|
| 7519 |
|
| 7520 |
-
|
| 7521 |
-
const float sin_theta = sinf(p*theta);
|
| 7522 |
|
| 7523 |
const float * const src = (float *)((char *) src0->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
| 7524 |
float * dst_data = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
|
@@ -7580,6 +7676,8 @@ static void ggml_compute_forward_rope_f16(
|
|
| 7580 |
// row index used to determine which thread to use
|
| 7581 |
int ir = 0;
|
| 7582 |
|
|
|
|
|
|
|
| 7583 |
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
| 7584 |
for (int64_t i2 = (mode == 0 ? 0 : n_past); i2 < ne2; i2++) {
|
| 7585 |
const int p = (mode == 0 ? n_past + i2 : i2);
|
|
@@ -7587,11 +7685,13 @@ static void ggml_compute_forward_rope_f16(
|
|
| 7587 |
if (ir++ < ir0) continue;
|
| 7588 |
if (ir > ir1) break;
|
| 7589 |
|
|
|
|
|
|
|
| 7590 |
for (int i0 = 0; i0 < n_dims; i0 += 2) {
|
| 7591 |
-
const float
|
|
|
|
| 7592 |
|
| 7593 |
-
|
| 7594 |
-
const float sin_theta = sinf(p*theta);
|
| 7595 |
|
| 7596 |
const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
| 7597 |
ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
|
@@ -8865,6 +8965,111 @@ static void ggml_compute_forward_flash_ff(
|
|
| 8865 |
}
|
| 8866 |
}
|
| 8867 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8868 |
/////////////////////////////////
|
| 8869 |
|
| 8870 |
static void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) {
|
|
@@ -9014,6 +9219,18 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
| 9014 |
{
|
| 9015 |
ggml_compute_forward_flash_ff(params, tensor->src0, tensor->src1, tensor->opt[0], tensor->opt[1], tensor->opt[2], tensor);
|
| 9016 |
} break;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9017 |
case GGML_OP_NONE:
|
| 9018 |
{
|
| 9019 |
// nop
|
|
@@ -9273,6 +9490,11 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
| 9273 |
{
|
| 9274 |
GGML_ASSERT(false); // not supported
|
| 9275 |
} break;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9276 |
case GGML_OP_NONE:
|
| 9277 |
{
|
| 9278 |
// nop
|
|
@@ -9765,6 +9987,11 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|
| 9765 |
|
| 9766 |
work_size = MAX(work_size, cur);
|
| 9767 |
} break;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9768 |
case GGML_OP_NONE:
|
| 9769 |
{
|
| 9770 |
node->n_tasks = 1;
|
|
|
|
| 2712 |
|
| 2713 |
"FLASH_ATTN",
|
| 2714 |
"FLASH_FF",
|
| 2715 |
+
|
| 2716 |
+
"MAP_UNARY",
|
| 2717 |
+
"MAP_BINARY",
|
| 2718 |
};
|
| 2719 |
|
| 2720 |
+
static_assert(GGML_OP_COUNT == 38, "GGML_OP_COUNT != 38");
|
| 2721 |
|
| 2722 |
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
| 2723 |
"none",
|
|
|
|
| 2760 |
|
| 2761 |
"flash_attn(x)",
|
| 2762 |
"flash_ff(x)",
|
| 2763 |
+
|
| 2764 |
+
"f(x)",
|
| 2765 |
+
"f(x,y)",
|
| 2766 |
};
|
| 2767 |
|
| 2768 |
+
static_assert(GGML_OP_COUNT == 38, "GGML_OP_COUNT != 38");
|
| 2769 |
|
| 2770 |
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
|
| 2771 |
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
|
|
|
|
| 3060 |
return NULL;
|
| 3061 |
}
|
| 3062 |
|
| 3063 |
+
const size_t mem_size = (params.mem_size + GGML_MEM_ALIGN - 1) & ~(GGML_MEM_ALIGN - 1);
|
| 3064 |
+
|
| 3065 |
*ctx = (struct ggml_context) {
|
| 3066 |
+
/*.mem_size =*/ mem_size,
|
| 3067 |
+
/*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size),
|
| 3068 |
/*.mem_buffer_owned =*/ params.mem_buffer ? false : true,
|
| 3069 |
/*.no_alloc =*/ params.no_alloc,
|
| 3070 |
/*.n_objects =*/ 0,
|
|
|
|
| 3074 |
/*.scratch_save =*/ { 0, 0, NULL, },
|
| 3075 |
};
|
| 3076 |
|
| 3077 |
+
GGML_ASSERT(ctx->mem_buffer != NULL);
|
| 3078 |
|
| 3079 |
ggml_assert_aligned(ctx->mem_buffer);
|
| 3080 |
|
|
|
|
| 4913 |
return result;
|
| 4914 |
}
|
| 4915 |
|
| 4916 |
+
// ggml_map_unary
|
| 4917 |
+
|
| 4918 |
+
struct ggml_tensor * ggml_map_unary_impl_f32(
|
| 4919 |
+
struct ggml_context * ctx,
|
| 4920 |
+
struct ggml_tensor * a,
|
| 4921 |
+
const ggml_unary_op_f32_t fun,
|
| 4922 |
+
bool inplace) {
|
| 4923 |
+
bool is_node = false;
|
| 4924 |
+
|
| 4925 |
+
if (!inplace && a->grad) {
|
| 4926 |
+
is_node = true;
|
| 4927 |
+
}
|
| 4928 |
+
|
| 4929 |
+
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
| 4930 |
+
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
| 4931 |
+
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
| 4932 |
+
|
| 4933 |
+
result->op = GGML_OP_MAP_UNARY;
|
| 4934 |
+
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
| 4935 |
+
result->src0 = a;
|
| 4936 |
+
result->opt[0] = addr_tensor;
|
| 4937 |
+
|
| 4938 |
+
return result;
|
| 4939 |
+
}
|
| 4940 |
+
|
| 4941 |
+
struct ggml_tensor * ggml_map_unary_f32(
|
| 4942 |
+
struct ggml_context * ctx,
|
| 4943 |
+
struct ggml_tensor * a,
|
| 4944 |
+
const ggml_unary_op_f32_t fun) {
|
| 4945 |
+
return ggml_map_unary_impl_f32(ctx, a, fun, false);
|
| 4946 |
+
}
|
| 4947 |
+
|
| 4948 |
+
struct ggml_tensor * ggml_map_unary_inplace_f32(
|
| 4949 |
+
struct ggml_context * ctx,
|
| 4950 |
+
struct ggml_tensor * a,
|
| 4951 |
+
const ggml_unary_op_f32_t fun) {
|
| 4952 |
+
return ggml_map_unary_impl_f32(ctx, a, fun, true);
|
| 4953 |
+
}
|
| 4954 |
+
|
| 4955 |
+
// ggml_map_binary
|
| 4956 |
+
|
| 4957 |
+
struct ggml_tensor * ggml_map_binary_impl_f32(
|
| 4958 |
+
struct ggml_context * ctx,
|
| 4959 |
+
struct ggml_tensor * a,
|
| 4960 |
+
struct ggml_tensor * b,
|
| 4961 |
+
const ggml_binary_op_f32_t fun,
|
| 4962 |
+
bool inplace) {
|
| 4963 |
+
GGML_ASSERT(ggml_are_same_shape(a, b));
|
| 4964 |
+
|
| 4965 |
+
bool is_node = false;
|
| 4966 |
+
|
| 4967 |
+
if (!inplace && (a->grad || b->grad)) {
|
| 4968 |
+
is_node = true;
|
| 4969 |
+
}
|
| 4970 |
+
|
| 4971 |
+
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
| 4972 |
+
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
| 4973 |
+
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
| 4974 |
+
|
| 4975 |
+
result->op = GGML_OP_MAP_BINARY;
|
| 4976 |
+
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
| 4977 |
+
result->src0 = a;
|
| 4978 |
+
result->src1 = b;
|
| 4979 |
+
result->opt[0] = addr_tensor;
|
| 4980 |
+
|
| 4981 |
+
return result;
|
| 4982 |
+
}
|
| 4983 |
+
|
| 4984 |
+
struct ggml_tensor * ggml_map_binary_f32(
|
| 4985 |
+
struct ggml_context * ctx,
|
| 4986 |
+
struct ggml_tensor * a,
|
| 4987 |
+
struct ggml_tensor * b,
|
| 4988 |
+
const ggml_binary_op_f32_t fun) {
|
| 4989 |
+
return ggml_map_binary_impl_f32(ctx, a, b, fun, false);
|
| 4990 |
+
}
|
| 4991 |
+
|
| 4992 |
+
struct ggml_tensor * ggml_map_binary_inplace_f32(
|
| 4993 |
+
struct ggml_context * ctx,
|
| 4994 |
+
struct ggml_tensor * a,
|
| 4995 |
+
struct ggml_tensor * b,
|
| 4996 |
+
const ggml_binary_op_f32_t fun) {
|
| 4997 |
+
return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
|
| 4998 |
+
}
|
| 4999 |
+
|
| 5000 |
////////////////////////////////////////////////////////////////////////////////
|
| 5001 |
|
| 5002 |
void ggml_set_param(
|
|
|
|
| 7599 |
// row index used to determine which thread to use
|
| 7600 |
int ir = 0;
|
| 7601 |
|
| 7602 |
+
const float theta_scale = powf(10000.0, -2.0f/n_dims);
|
| 7603 |
+
|
| 7604 |
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
| 7605 |
for (int64_t i2 = (mode == 0 ? 0 : n_past); i2 < ne2; i2++) {
|
| 7606 |
const int p = (mode == 0 ? n_past + i2 : i2);
|
|
|
|
| 7608 |
if (ir++ < ir0) continue;
|
| 7609 |
if (ir > ir1) break;
|
| 7610 |
|
| 7611 |
+
float theta = (float)p;
|
| 7612 |
+
|
| 7613 |
for (int i0 = 0; i0 < n_dims; i0 += 2) {
|
| 7614 |
+
const float cos_theta = cosf(theta);
|
| 7615 |
+
const float sin_theta = sinf(theta);
|
| 7616 |
|
| 7617 |
+
theta *= theta_scale;
|
|
|
|
| 7618 |
|
| 7619 |
const float * const src = (float *)((char *) src0->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
| 7620 |
float * dst_data = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
|
|
|
| 7676 |
// row index used to determine which thread to use
|
| 7677 |
int ir = 0;
|
| 7678 |
|
| 7679 |
+
const float theta_scale = powf(10000.0, -2.0f/n_dims);
|
| 7680 |
+
|
| 7681 |
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
| 7682 |
for (int64_t i2 = (mode == 0 ? 0 : n_past); i2 < ne2; i2++) {
|
| 7683 |
const int p = (mode == 0 ? n_past + i2 : i2);
|
|
|
|
| 7685 |
if (ir++ < ir0) continue;
|
| 7686 |
if (ir > ir1) break;
|
| 7687 |
|
| 7688 |
+
float theta = (float)p;
|
| 7689 |
+
|
| 7690 |
for (int i0 = 0; i0 < n_dims; i0 += 2) {
|
| 7691 |
+
const float cos_theta = cosf(theta);
|
| 7692 |
+
const float sin_theta = sinf(theta);
|
| 7693 |
|
| 7694 |
+
theta *= theta_scale;
|
|
|
|
| 7695 |
|
| 7696 |
const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
| 7697 |
ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
|
|
|
| 8965 |
}
|
| 8966 |
}
|
| 8967 |
|
| 8968 |
+
// ggml_compute_forward_map_unary
|
| 8969 |
+
|
| 8970 |
+
static void ggml_compute_forward_map_unary_f32(
|
| 8971 |
+
const struct ggml_compute_params * params,
|
| 8972 |
+
const struct ggml_tensor * src0,
|
| 8973 |
+
struct ggml_tensor * dst,
|
| 8974 |
+
const ggml_unary_op_f32_t fun) {
|
| 8975 |
+
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 8976 |
+
|
| 8977 |
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
| 8978 |
+
return;
|
| 8979 |
+
}
|
| 8980 |
+
|
| 8981 |
+
const int n = ggml_nrows(src0);
|
| 8982 |
+
const int nc = src0->ne[0];
|
| 8983 |
+
|
| 8984 |
+
assert( dst->nb[0] == sizeof(float));
|
| 8985 |
+
assert(src0->nb[0] == sizeof(float));
|
| 8986 |
+
|
| 8987 |
+
for (int i = 0; i < n; i++) {
|
| 8988 |
+
fun(nc,
|
| 8989 |
+
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
| 8990 |
+
(float *) ((char *) src0->data + i*(src0->nb[1])));
|
| 8991 |
+
}
|
| 8992 |
+
}
|
| 8993 |
+
|
| 8994 |
+
|
| 8995 |
+
static void ggml_compute_forward_map_unary(
|
| 8996 |
+
const struct ggml_compute_params * params,
|
| 8997 |
+
const struct ggml_tensor * src0,
|
| 8998 |
+
struct ggml_tensor * dst,
|
| 8999 |
+
const ggml_unary_op_f32_t fun) {
|
| 9000 |
+
switch (src0->type) {
|
| 9001 |
+
case GGML_TYPE_F32:
|
| 9002 |
+
{
|
| 9003 |
+
ggml_compute_forward_map_unary_f32(params, src0, dst, fun);
|
| 9004 |
+
} break;
|
| 9005 |
+
case GGML_TYPE_Q4_0:
|
| 9006 |
+
case GGML_TYPE_Q4_1:
|
| 9007 |
+
case GGML_TYPE_I8:
|
| 9008 |
+
case GGML_TYPE_I16:
|
| 9009 |
+
case GGML_TYPE_I32:
|
| 9010 |
+
case GGML_TYPE_F16:
|
| 9011 |
+
case GGML_TYPE_COUNT:
|
| 9012 |
+
{
|
| 9013 |
+
GGML_ASSERT(false);
|
| 9014 |
+
} break;
|
| 9015 |
+
}
|
| 9016 |
+
}
|
| 9017 |
+
|
| 9018 |
+
// ggml_compute_forward_map_binary
|
| 9019 |
+
|
| 9020 |
+
static void ggml_compute_forward_map_binary_f32(
|
| 9021 |
+
const struct ggml_compute_params * params,
|
| 9022 |
+
const struct ggml_tensor * src0,
|
| 9023 |
+
const struct ggml_tensor * src1,
|
| 9024 |
+
struct ggml_tensor * dst,
|
| 9025 |
+
const ggml_binary_op_f32_t fun) {
|
| 9026 |
+
assert(params->ith == 0);
|
| 9027 |
+
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
| 9028 |
+
|
| 9029 |
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
| 9030 |
+
return;
|
| 9031 |
+
}
|
| 9032 |
+
|
| 9033 |
+
const int n = ggml_nrows(src0);
|
| 9034 |
+
const int nc = src0->ne[0];
|
| 9035 |
+
|
| 9036 |
+
assert( dst->nb[0] == sizeof(float));
|
| 9037 |
+
assert(src0->nb[0] == sizeof(float));
|
| 9038 |
+
assert(src1->nb[0] == sizeof(float));
|
| 9039 |
+
|
| 9040 |
+
for (int i = 0; i < n; i++) {
|
| 9041 |
+
fun(nc,
|
| 9042 |
+
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
| 9043 |
+
(float *) ((char *) src0->data + i*(src0->nb[1])),
|
| 9044 |
+
(float *) ((char *) src1->data + i*(src1->nb[1])));
|
| 9045 |
+
}
|
| 9046 |
+
}
|
| 9047 |
+
|
| 9048 |
+
|
| 9049 |
+
static void ggml_compute_forward_map_binary(
|
| 9050 |
+
const struct ggml_compute_params * params,
|
| 9051 |
+
const struct ggml_tensor * src0,
|
| 9052 |
+
const struct ggml_tensor * src1,
|
| 9053 |
+
struct ggml_tensor * dst,
|
| 9054 |
+
const ggml_binary_op_f32_t fun) {
|
| 9055 |
+
switch (src0->type) {
|
| 9056 |
+
case GGML_TYPE_F32:
|
| 9057 |
+
{
|
| 9058 |
+
ggml_compute_forward_map_binary_f32(params, src0, src1, dst, fun);
|
| 9059 |
+
} break;
|
| 9060 |
+
case GGML_TYPE_Q4_0:
|
| 9061 |
+
case GGML_TYPE_Q4_1:
|
| 9062 |
+
case GGML_TYPE_I8:
|
| 9063 |
+
case GGML_TYPE_I16:
|
| 9064 |
+
case GGML_TYPE_I32:
|
| 9065 |
+
case GGML_TYPE_F16:
|
| 9066 |
+
case GGML_TYPE_COUNT:
|
| 9067 |
+
{
|
| 9068 |
+
GGML_ASSERT(false);
|
| 9069 |
+
} break;
|
| 9070 |
+
}
|
| 9071 |
+
}
|
| 9072 |
+
|
| 9073 |
/////////////////////////////////
|
| 9074 |
|
| 9075 |
static void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) {
|
|
|
|
| 9219 |
{
|
| 9220 |
ggml_compute_forward_flash_ff(params, tensor->src0, tensor->src1, tensor->opt[0], tensor->opt[1], tensor->opt[2], tensor);
|
| 9221 |
} break;
|
| 9222 |
+
case GGML_OP_MAP_UNARY:
|
| 9223 |
+
{
|
| 9224 |
+
const ggml_unary_op_f32_t fun = *((ggml_unary_op_f32_t *)tensor->opt[0]->data);
|
| 9225 |
+
ggml_compute_forward_map_unary(params, tensor->src0, tensor, fun);
|
| 9226 |
+
}
|
| 9227 |
+
break;
|
| 9228 |
+
case GGML_OP_MAP_BINARY:
|
| 9229 |
+
{
|
| 9230 |
+
const ggml_binary_op_f32_t fun = *((ggml_binary_op_f32_t *)tensor->opt[0]->data);
|
| 9231 |
+
ggml_compute_forward_map_binary(params, tensor->src0, tensor->src1, tensor, fun);
|
| 9232 |
+
}
|
| 9233 |
+
break;
|
| 9234 |
case GGML_OP_NONE:
|
| 9235 |
{
|
| 9236 |
// nop
|
|
|
|
| 9490 |
{
|
| 9491 |
GGML_ASSERT(false); // not supported
|
| 9492 |
} break;
|
| 9493 |
+
case GGML_OP_MAP_UNARY:
|
| 9494 |
+
case GGML_OP_MAP_BINARY:
|
| 9495 |
+
{
|
| 9496 |
+
GGML_ASSERT(false); // not supported
|
| 9497 |
+
} break;
|
| 9498 |
case GGML_OP_NONE:
|
| 9499 |
{
|
| 9500 |
// nop
|
|
|
|
| 9987 |
|
| 9988 |
work_size = MAX(work_size, cur);
|
| 9989 |
} break;
|
| 9990 |
+
case GGML_OP_MAP_UNARY:
|
| 9991 |
+
case GGML_OP_MAP_BINARY:
|
| 9992 |
+
{
|
| 9993 |
+
node->n_tasks = 1;
|
| 9994 |
+
} break;
|
| 9995 |
case GGML_OP_NONE:
|
| 9996 |
{
|
| 9997 |
node->n_tasks = 1;
|
ggml.h
CHANGED
|
@@ -253,6 +253,9 @@ enum ggml_op {
|
|
| 253 |
GGML_OP_FLASH_ATTN,
|
| 254 |
GGML_OP_FLASH_FF,
|
| 255 |
|
|
|
|
|
|
|
|
|
|
| 256 |
GGML_OP_COUNT,
|
| 257 |
};
|
| 258 |
|
|
@@ -652,6 +655,21 @@ struct ggml_tensor * ggml_flash_ff(
|
|
| 652 |
struct ggml_tensor * c0,
|
| 653 |
struct ggml_tensor * c1);
|
| 654 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 655 |
//
|
| 656 |
// automatic differentiation
|
| 657 |
//
|
|
|
|
| 253 |
GGML_OP_FLASH_ATTN,
|
| 254 |
GGML_OP_FLASH_FF,
|
| 255 |
|
| 256 |
+
GGML_OP_MAP_UNARY,
|
| 257 |
+
GGML_OP_MAP_BINARY,
|
| 258 |
+
|
| 259 |
GGML_OP_COUNT,
|
| 260 |
};
|
| 261 |
|
|
|
|
| 655 |
struct ggml_tensor * c0,
|
| 656 |
struct ggml_tensor * c1);
|
| 657 |
|
| 658 |
+
// Mapping operations
|
| 659 |
+
typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
|
| 660 |
+
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
| 661 |
+
|
| 662 |
+
struct ggml_tensor * ggml_map_unary_f32(
|
| 663 |
+
struct ggml_context * ctx,
|
| 664 |
+
struct ggml_tensor * a,
|
| 665 |
+
const ggml_unary_op_f32_t fun);
|
| 666 |
+
|
| 667 |
+
struct ggml_tensor * ggml_map_binary_f32(
|
| 668 |
+
struct ggml_context * ctx,
|
| 669 |
+
struct ggml_tensor * a,
|
| 670 |
+
struct ggml_tensor * b,
|
| 671 |
+
const ggml_binary_op_f32_t fun);
|
| 672 |
+
|
| 673 |
//
|
| 674 |
// automatic differentiation
|
| 675 |
//
|