Spaces:
Sleeping
Sleeping
Neo Zhang Jianyu
commited on
Commit
·
6d1ba81
1
Parent(s):
a06cbc7
fix mul_mat_id() for new input, make the ut pass (llama/6682)
Browse files- ggml-sycl.cpp +50 -46
ggml-sycl.cpp
CHANGED
|
@@ -15996,73 +15996,76 @@ static void ggml_sycl_mul_mat_id_sycl(ggml_tensor * dst) {
|
|
| 15996 |
static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
|
| 15997 |
const ggml_tensor *src1,
|
| 15998 |
ggml_tensor *dst) try {
|
| 15999 |
-
|
| 16000 |
-
|
| 16001 |
-
|
| 16002 |
-
|
| 16003 |
|
| 16004 |
-
const
|
| 16005 |
-
const
|
| 16006 |
|
| 16007 |
-
const
|
| 16008 |
-
const int32_t
|
| 16009 |
-
const int32_t n_as = ((int32_t *) dst->op_params)[1];
|
| 16010 |
|
| 16011 |
std::vector<char> ids_host(ggml_nbytes(ids));
|
|
|
|
| 16012 |
|
| 16013 |
-
|
| 16014 |
-
|
| 16015 |
-
|
| 16016 |
-
const char * ids_dev = (const char *)((const ggml_tensor_extra_gpu *)ids->extra)->data_device[g_main_device];
|
| 16017 |
-
SYCL_CHECK(CHECK_TRY_ERROR(
|
| 16018 |
-
stream->memcpy(ids_host.data(), ids_dev, ggml_nbytes(ids)).wait()));
|
| 16019 |
-
// SYCL_CHECK(CHECK_TRY_ERROR(stream->wait()));
|
| 16020 |
-
} else {
|
| 16021 |
-
memcpy(ids_host.data(), ids->data, ggml_nbytes(ids));
|
| 16022 |
-
}
|
| 16023 |
|
| 16024 |
-
const ggml_tensor_extra_gpu *
|
| 16025 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16026 |
|
|
|
|
| 16027 |
ggml_tensor_extra_gpu src1_row_extra;
|
| 16028 |
ggml_tensor_extra_gpu dst_row_extra;
|
| 16029 |
|
|
|
|
| 16030 |
ggml_tensor src1_row = *src1;
|
| 16031 |
ggml_tensor dst_row = *dst;
|
| 16032 |
|
| 16033 |
src1_row.backend = GGML_BACKEND_TYPE_GPU;
|
| 16034 |
dst_row.backend = GGML_BACKEND_TYPE_GPU;
|
| 16035 |
|
|
|
|
| 16036 |
src1_row.extra = &src1_row_extra;
|
| 16037 |
dst_row.extra = &dst_row_extra;
|
| 16038 |
|
| 16039 |
-
char *
|
| 16040 |
-
|
| 16041 |
-
|
| 16042 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16043 |
|
| 16044 |
-
|
| 16045 |
-
|
| 16046 |
-
|
| 16047 |
|
|
|
|
| 16048 |
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
|
| 16049 |
-
|
| 16050 |
-
|
| 16051 |
-
|
| 16052 |
-
|
| 16053 |
-
const int32_t row_id = *(const int32_t *) (ids_host.data() + i01*ids->nb[1] + id*ids->nb[0]);
|
| 16054 |
|
| 16055 |
GGML_ASSERT(row_id >= 0 && row_id < n_as);
|
| 16056 |
|
| 16057 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16058 |
|
| 16059 |
-
|
| 16060 |
-
src1_row.data = (char *) src1->data + i01*src1->nb[1]; // TODO why is this set?
|
| 16061 |
-
|
| 16062 |
-
dst_row_extra.data_device[g_main_device] = dst_original + i01*dst->nb[1];
|
| 16063 |
-
dst_row.data = (char *) dst->data + i01*dst->nb[1]; // TODO why is this set?
|
| 16064 |
-
|
| 16065 |
-
ggml_sycl_mul_mat(src0_row, &src1_row, &dst_row);
|
| 16066 |
}
|
| 16067 |
} else {
|
| 16068 |
sycl_pool_alloc<char> src1_contiguous(sizeof(float)*ggml_nelements(src1));
|
|
@@ -16072,8 +16075,6 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
|
|
| 16072 |
dst_row_extra.data_device[g_main_device] = dst_contiguous.get();
|
| 16073 |
|
| 16074 |
for (int32_t row_id = 0; row_id < n_as; ++row_id) {
|
| 16075 |
-
const struct ggml_tensor * src0_row = dst->src[row_id + 2];
|
| 16076 |
-
|
| 16077 |
int64_t num_src1_rows = 0;
|
| 16078 |
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
|
| 16079 |
const int32_t row_id_i = *(const int32_t *) (ids_host.data() + i01*ids->nb[1] + id*ids->nb[0]);
|
|
@@ -16086,7 +16087,7 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
|
|
| 16086 |
|
| 16087 |
SYCL_CHECK(CHECK_TRY_ERROR(
|
| 16088 |
stream->memcpy(src1_contiguous.get() + num_src1_rows * nb11,
|
| 16089 |
-
src1_original + i01 * nb11, nb11)
|
| 16090 |
num_src1_rows++;
|
| 16091 |
}
|
| 16092 |
|
|
@@ -16094,6 +16095,9 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
|
|
| 16094 |
continue;
|
| 16095 |
}
|
| 16096 |
|
|
|
|
|
|
|
|
|
|
| 16097 |
src1_row.ne[1] = num_src1_rows;
|
| 16098 |
dst_row.ne[1] = num_src1_rows;
|
| 16099 |
|
|
@@ -16105,7 +16109,7 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
|
|
| 16105 |
dst_row.nb[2] = num_src1_rows*nb1;
|
| 16106 |
dst_row.nb[3] = num_src1_rows*nb1;
|
| 16107 |
|
| 16108 |
-
ggml_sycl_mul_mat(src0_row, &src1_row, &dst_row);
|
| 16109 |
|
| 16110 |
num_src1_rows = 0;
|
| 16111 |
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
|
|
@@ -16119,7 +16123,7 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
|
|
| 16119 |
|
| 16120 |
SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(
|
| 16121 |
dst_original + i01 * nb1,
|
| 16122 |
-
dst_contiguous.get() + num_src1_rows * nb1, nb1)
|
| 16123 |
num_src1_rows++;
|
| 16124 |
}
|
| 16125 |
}
|
|
|
|
| 15996 |
static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
|
| 15997 |
const ggml_tensor *src1,
|
| 15998 |
ggml_tensor *dst) try {
|
| 15999 |
+
GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT &&
|
| 16000 |
+
"mul_mat_id does not support split buffers");
|
| 16001 |
+
const ggml_tensor *ids = dst->src[2];
|
| 16002 |
+
const dpct::queue_ptr stream = g_syclStreams[g_main_device][0];
|
| 16003 |
|
| 16004 |
+
const size_t nb11 = src1->nb[1];
|
| 16005 |
+
const size_t nb1 = dst->nb[1];
|
| 16006 |
|
| 16007 |
+
const int32_t id = ((int32_t *)dst->op_params)[0];
|
| 16008 |
+
const int32_t n_as = src0->ne[2];
|
|
|
|
| 16009 |
|
| 16010 |
std::vector<char> ids_host(ggml_nbytes(ids));
|
| 16011 |
+
const char *ids_dev = (const char *)ids->data;
|
| 16012 |
|
| 16013 |
+
SYCL_CHECK(CHECK_TRY_ERROR(
|
| 16014 |
+
stream->memcpy(ids_host.data(), ids_dev, ggml_nbytes(ids))));
|
| 16015 |
+
SYCL_CHECK(CHECK_TRY_ERROR(stream->wait()));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16016 |
|
| 16017 |
+
const ggml_tensor_extra_gpu *src0_extra =
|
| 16018 |
+
(const ggml_tensor_extra_gpu *)src0->extra;
|
| 16019 |
+
const ggml_tensor_extra_gpu *src1_extra =
|
| 16020 |
+
(const ggml_tensor_extra_gpu *)src1->extra;
|
| 16021 |
+
const ggml_tensor_extra_gpu *dst_extra =
|
| 16022 |
+
(const ggml_tensor_extra_gpu *)dst->extra;
|
| 16023 |
|
| 16024 |
+
ggml_tensor_extra_gpu src0_row_extra;
|
| 16025 |
ggml_tensor_extra_gpu src1_row_extra;
|
| 16026 |
ggml_tensor_extra_gpu dst_row_extra;
|
| 16027 |
|
| 16028 |
+
ggml_tensor src0_row = *src0;
|
| 16029 |
ggml_tensor src1_row = *src1;
|
| 16030 |
ggml_tensor dst_row = *dst;
|
| 16031 |
|
| 16032 |
src1_row.backend = GGML_BACKEND_TYPE_GPU;
|
| 16033 |
dst_row.backend = GGML_BACKEND_TYPE_GPU;
|
| 16034 |
|
| 16035 |
+
src0_row.extra = &src0_row_extra;
|
| 16036 |
src1_row.extra = &src1_row_extra;
|
| 16037 |
dst_row.extra = &dst_row_extra;
|
| 16038 |
|
| 16039 |
+
char *src0_original = src1->backend == GGML_BACKEND_TYPE_CPU
|
| 16040 |
+
? (char *)src0->data
|
| 16041 |
+
: (char *)src0_extra->data_device[g_main_device];
|
| 16042 |
+
char *src1_original = src1->backend == GGML_BACKEND_TYPE_CPU
|
| 16043 |
+
? (char *)src1->data
|
| 16044 |
+
: (char *)src1_extra->data_device[g_main_device];
|
| 16045 |
+
char *dst_original = dst->backend == GGML_BACKEND_TYPE_CPU
|
| 16046 |
+
? (char *)dst->data
|
| 16047 |
+
: (char *)dst_extra->data_device[g_main_device];
|
| 16048 |
|
| 16049 |
+
src0_row.ne[2] = 1;
|
| 16050 |
+
src0_row.ne[3] = 1;
|
| 16051 |
+
src0_row.nb[3] = src0->nb[2];
|
| 16052 |
|
| 16053 |
+
if (src1->ne[1] == 1) {
|
| 16054 |
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
|
| 16055 |
+
const int32_t row_id =
|
| 16056 |
+
*(const int32_t *)(ids_host.data() + i01 * ids->nb[1] +
|
| 16057 |
+
id * ids->nb[0]);
|
|
|
|
|
|
|
| 16058 |
|
| 16059 |
GGML_ASSERT(row_id >= 0 && row_id < n_as);
|
| 16060 |
|
| 16061 |
+
src0_row_extra.data_device[g_main_device] =
|
| 16062 |
+
src0_original + row_id * src0->nb[2];
|
| 16063 |
+
src1_row_extra.data_device[g_main_device] =
|
| 16064 |
+
src1_original + i01 * src1->nb[1];
|
| 16065 |
+
dst_row_extra.data_device[g_main_device] =
|
| 16066 |
+
dst_original + i01 * dst->nb[1];
|
| 16067 |
|
| 16068 |
+
ggml_sycl_mul_mat(&src0_row, &src1_row, &dst_row);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16069 |
}
|
| 16070 |
} else {
|
| 16071 |
sycl_pool_alloc<char> src1_contiguous(sizeof(float)*ggml_nelements(src1));
|
|
|
|
| 16075 |
dst_row_extra.data_device[g_main_device] = dst_contiguous.get();
|
| 16076 |
|
| 16077 |
for (int32_t row_id = 0; row_id < n_as; ++row_id) {
|
|
|
|
|
|
|
| 16078 |
int64_t num_src1_rows = 0;
|
| 16079 |
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
|
| 16080 |
const int32_t row_id_i = *(const int32_t *) (ids_host.data() + i01*ids->nb[1] + id*ids->nb[0]);
|
|
|
|
| 16087 |
|
| 16088 |
SYCL_CHECK(CHECK_TRY_ERROR(
|
| 16089 |
stream->memcpy(src1_contiguous.get() + num_src1_rows * nb11,
|
| 16090 |
+
src1_original + i01 * nb11, nb11)));
|
| 16091 |
num_src1_rows++;
|
| 16092 |
}
|
| 16093 |
|
|
|
|
| 16095 |
continue;
|
| 16096 |
}
|
| 16097 |
|
| 16098 |
+
src0_row_extra.data_device[g_main_device] =
|
| 16099 |
+
src0_original + row_id * src0->nb[2];
|
| 16100 |
+
|
| 16101 |
src1_row.ne[1] = num_src1_rows;
|
| 16102 |
dst_row.ne[1] = num_src1_rows;
|
| 16103 |
|
|
|
|
| 16109 |
dst_row.nb[2] = num_src1_rows*nb1;
|
| 16110 |
dst_row.nb[3] = num_src1_rows*nb1;
|
| 16111 |
|
| 16112 |
+
ggml_sycl_mul_mat(&src0_row, &src1_row, &dst_row);
|
| 16113 |
|
| 16114 |
num_src1_rows = 0;
|
| 16115 |
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
|
|
|
|
| 16123 |
|
| 16124 |
SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(
|
| 16125 |
dst_original + i01 * nb1,
|
| 16126 |
+
dst_contiguous.get() + num_src1_rows * nb1, nb1)));
|
| 16127 |
num_src1_rows++;
|
| 16128 |
}
|
| 16129 |
}
|