Spaces:
Sleeping
Sleeping
Neo Zhang Jianyu
commited on
add wait() to make code stable (llama/5895)
Browse files- ggml-sycl.cpp +46 -13
ggml-sycl.cpp
CHANGED
|
@@ -3769,8 +3769,42 @@ void log_ggml_var_device(const char*name, float *src, size_t total_elements, boo
|
|
| 3769 |
std::ofstream logfile;
|
| 3770 |
logfile.open(filename);
|
| 3771 |
for(size_t i=0; i<total_elements; i++){
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3772 |
if((i+1)%20 ==0) logfile <<std::endl;
|
| 3773 |
-
else logfile << local_buf[i] <<" ";
|
| 3774 |
}
|
| 3775 |
logfile <<std::endl;
|
| 3776 |
logfile.close();
|
|
@@ -14126,7 +14160,7 @@ inline void ggml_sycl_op_mul_mat_sycl(
|
|
| 14126 |
src1_ptr, dpct::library_data_t::real_half, ne10, &beta_f16,
|
| 14127 |
dst_f16.get(), dpct::library_data_t::real_half, ldc,
|
| 14128 |
dpct::library_data_t::real_half)));
|
| 14129 |
-
|
| 14130 |
const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(GGML_TYPE_F16);
|
| 14131 |
to_fp32_sycl(dst_f16.get(), dst_dd_i, row_diff*src1_ncols, stream);
|
| 14132 |
}
|
|
@@ -14159,6 +14193,7 @@ inline void ggml_sycl_op_mul_mat_sycl(
|
|
| 14159 |
dpct::get_value(&alpha, *g_sycl_handles[id]), src0_ddf_i, ne00,
|
| 14160 |
src1_ddf1_i, ne10, dpct::get_value(&beta, *g_sycl_handles[id]),
|
| 14161 |
dst_dd_i, ldc)));
|
|
|
|
| 14162 |
}
|
| 14163 |
(void) dst;
|
| 14164 |
(void) src1_ddq_i;
|
|
@@ -15295,8 +15330,8 @@ static void ggml_sycl_mul_mat_batched_sycl(const ggml_tensor *src0,
|
|
| 15295 |
sycl_pool_alloc<sycl::half> dst_f16;
|
| 15296 |
char * dst_t;
|
| 15297 |
|
| 15298 |
-
dpct::library_data_t cu_compute_type = dpct::library_data_t::
|
| 15299 |
-
dpct::library_data_t cu_data_type = dpct::library_data_t::
|
| 15300 |
|
| 15301 |
// dst strides
|
| 15302 |
size_t nbd2 = dst->nb[2];
|
|
@@ -15308,15 +15343,13 @@ static void ggml_sycl_mul_mat_batched_sycl(const ggml_tensor *src0,
|
|
| 15308 |
const float alpha_f32 = 1.0f;
|
| 15309 |
const float beta_f32 = 0.0f;
|
| 15310 |
|
| 15311 |
-
const void * alpha = &
|
| 15312 |
-
const void * beta = &
|
| 15313 |
|
| 15314 |
// TODO: Renable (dst->op_params[0] =! GGML_PREC_DEFAULT) pathway
|
| 15315 |
-
//
|
| 15316 |
-
dst_t = (char *) dst_f16.alloc(ne_dst);
|
| 15317 |
|
| 15318 |
-
|
| 15319 |
-
nbd3 /= sizeof(float) / sizeof(sycl::half);
|
| 15320 |
|
| 15321 |
GGML_ASSERT(ne12 % ne02 == 0);
|
| 15322 |
GGML_ASSERT(ne13 % ne03 == 0);
|
|
@@ -15356,6 +15389,7 @@ static void ggml_sycl_mul_mat_batched_sycl(const ggml_tensor *src0,
|
|
| 15356 |
nb11 / nb10, nb12 / nb10, beta,
|
| 15357 |
(char *)dst_t, cu_data_type, ne01, nb2 / nb0,
|
| 15358 |
ne12 * ne13, cu_compute_type)));
|
|
|
|
| 15359 |
} else {
|
| 15360 |
const int ne23 = ne12*ne13;
|
| 15361 |
|
|
@@ -15386,7 +15420,7 @@ static void ggml_sycl_mul_mat_batched_sycl(const ggml_tensor *src0,
|
|
| 15386 |
nb02, nb03, nb12_scaled, nb13_scaled,
|
| 15387 |
nbd2, nbd3, r2, r3, item_ct1);
|
| 15388 |
});
|
| 15389 |
-
});
|
| 15390 |
}
|
| 15391 |
SYCL_CHECK(CHECK_TRY_ERROR(dpct::gemm_batch(
|
| 15392 |
*g_sycl_handles[g_main_device], oneapi::mkl::transpose::trans,
|
|
@@ -15397,11 +15431,10 @@ static void ggml_sycl_mul_mat_batched_sycl(const ggml_tensor *src0,
|
|
| 15397 |
dpct::library_data_t::real_half, nb11 / nb10, beta,
|
| 15398 |
(void **)(ptrs_dst.get() + 0 * ne23), cu_data_type, ne01, ne23,
|
| 15399 |
cu_compute_type)));
|
|
|
|
| 15400 |
}
|
| 15401 |
#endif
|
| 15402 |
|
| 15403 |
-
const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(GGML_TYPE_F16);
|
| 15404 |
-
to_fp32_sycl(dst_f16.get(), dst_ddf, ne_dst, main_stream);
|
| 15405 |
}
|
| 15406 |
catch (sycl::exception const &exc) {
|
| 15407 |
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
|
|
|
|
| 3769 |
std::ofstream logfile;
|
| 3770 |
logfile.open(filename);
|
| 3771 |
for(size_t i=0; i<total_elements; i++){
|
| 3772 |
+
logfile << local_buf[i] <<" ";
|
| 3773 |
+
if((i+1)%20 ==0) logfile <<std::endl;
|
| 3774 |
+
}
|
| 3775 |
+
logfile <<std::endl;
|
| 3776 |
+
logfile.close();
|
| 3777 |
+
|
| 3778 |
+
if(src_on_device) ggml_sycl_host_free(local_buf);
|
| 3779 |
+
}
|
| 3780 |
+
|
| 3781 |
+
void log_ggml_var_device_fp16(const char*name, sycl::half *src, size_t total_elements, bool src_on_device){
|
| 3782 |
+
if(!g_ggml_sycl_debug) return;
|
| 3783 |
+
if(!src){
|
| 3784 |
+
printf("GGML Tensor:%s skip to save for NULL pointer\n", name);
|
| 3785 |
+
return;
|
| 3786 |
+
}
|
| 3787 |
+
char filename[1024];
|
| 3788 |
+
sprintf(filename, "%s.txt", name);
|
| 3789 |
+
printf("GGML Tensor:%s save to %s\n", name, filename);
|
| 3790 |
+
|
| 3791 |
+
size_t total_size = total_elements*sizeof(sycl::half);
|
| 3792 |
+
sycl::half *local_buf = NULL;
|
| 3793 |
+
if(src_on_device) {
|
| 3794 |
+
local_buf = (sycl::half *) ggml_sycl_host_malloc(total_size);
|
| 3795 |
+
ggml_sycl_set_device(g_main_device);
|
| 3796 |
+
dpct::queue_ptr main_stream = g_syclStreams[g_main_device][0];
|
| 3797 |
+
main_stream->memcpy(local_buf, src, total_size).wait();
|
| 3798 |
+
}
|
| 3799 |
+
else {
|
| 3800 |
+
local_buf = (sycl::half *)src;
|
| 3801 |
+
}
|
| 3802 |
+
|
| 3803 |
+
std::ofstream logfile;
|
| 3804 |
+
logfile.open(filename);
|
| 3805 |
+
for(size_t i=0; i<total_elements; i++){
|
| 3806 |
+
logfile << local_buf[i] <<" ";
|
| 3807 |
if((i+1)%20 ==0) logfile <<std::endl;
|
|
|
|
| 3808 |
}
|
| 3809 |
logfile <<std::endl;
|
| 3810 |
logfile.close();
|
|
|
|
| 14160 |
src1_ptr, dpct::library_data_t::real_half, ne10, &beta_f16,
|
| 14161 |
dst_f16.get(), dpct::library_data_t::real_half, ldc,
|
| 14162 |
dpct::library_data_t::real_half)));
|
| 14163 |
+
g_sycl_handles[id]->wait();
|
| 14164 |
const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(GGML_TYPE_F16);
|
| 14165 |
to_fp32_sycl(dst_f16.get(), dst_dd_i, row_diff*src1_ncols, stream);
|
| 14166 |
}
|
|
|
|
| 14193 |
dpct::get_value(&alpha, *g_sycl_handles[id]), src0_ddf_i, ne00,
|
| 14194 |
src1_ddf1_i, ne10, dpct::get_value(&beta, *g_sycl_handles[id]),
|
| 14195 |
dst_dd_i, ldc)));
|
| 14196 |
+
g_sycl_handles[id]->wait();
|
| 14197 |
}
|
| 14198 |
(void) dst;
|
| 14199 |
(void) src1_ddq_i;
|
|
|
|
| 15330 |
sycl_pool_alloc<sycl::half> dst_f16;
|
| 15331 |
char * dst_t;
|
| 15332 |
|
| 15333 |
+
dpct::library_data_t cu_compute_type = dpct::library_data_t::real_float;
|
| 15334 |
+
dpct::library_data_t cu_data_type = dpct::library_data_t::real_float;
|
| 15335 |
|
| 15336 |
// dst strides
|
| 15337 |
size_t nbd2 = dst->nb[2];
|
|
|
|
| 15343 |
const float alpha_f32 = 1.0f;
|
| 15344 |
const float beta_f32 = 0.0f;
|
| 15345 |
|
| 15346 |
+
const void * alpha = &alpha_f32;
|
| 15347 |
+
const void * beta = &beta_f32;
|
| 15348 |
|
| 15349 |
// TODO: Renable (dst->op_params[0] =! GGML_PREC_DEFAULT) pathway
|
| 15350 |
+
// oneMKL open source supports half, half, float, float: datatypes
|
|
|
|
| 15351 |
|
| 15352 |
+
dst_t = (char *) dst_ddf;
|
|
|
|
| 15353 |
|
| 15354 |
GGML_ASSERT(ne12 % ne02 == 0);
|
| 15355 |
GGML_ASSERT(ne13 % ne03 == 0);
|
|
|
|
| 15389 |
nb11 / nb10, nb12 / nb10, beta,
|
| 15390 |
(char *)dst_t, cu_data_type, ne01, nb2 / nb0,
|
| 15391 |
ne12 * ne13, cu_compute_type)));
|
| 15392 |
+
g_sycl_handles[g_main_device]->wait();
|
| 15393 |
} else {
|
| 15394 |
const int ne23 = ne12*ne13;
|
| 15395 |
|
|
|
|
| 15420 |
nb02, nb03, nb12_scaled, nb13_scaled,
|
| 15421 |
nbd2, nbd3, r2, r3, item_ct1);
|
| 15422 |
});
|
| 15423 |
+
}).wait();
|
| 15424 |
}
|
| 15425 |
SYCL_CHECK(CHECK_TRY_ERROR(dpct::gemm_batch(
|
| 15426 |
*g_sycl_handles[g_main_device], oneapi::mkl::transpose::trans,
|
|
|
|
| 15431 |
dpct::library_data_t::real_half, nb11 / nb10, beta,
|
| 15432 |
(void **)(ptrs_dst.get() + 0 * ne23), cu_data_type, ne01, ne23,
|
| 15433 |
cu_compute_type)));
|
| 15434 |
+
g_sycl_handles[g_main_device]->wait();
|
| 15435 |
}
|
| 15436 |
#endif
|
| 15437 |
|
|
|
|
|
|
|
| 15438 |
}
|
| 15439 |
catch (sycl::exception const &exc) {
|
| 15440 |
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
|