Spaces:
Sleeping
ggml : add unified SYCL backend for Intel GPUs (llama/2690)
Browse files* first update for migration
* update init_cublas
* add debug functio, commit all help code
* step 1
* step 2
* step3 add fp16, slower 31->28
* add GGML_LIST_DEVICE function
* step 5 format device and print
* step6, enhance error check, remove CUDA macro, enhance device id to fix none-zero id issue
* support main device is non-zero
* step7 add debug for code path, rm log
* step 8, rename all macro & func from cuda by sycl
* fix error of select non-zero device, format device list
* ren ggml-sycl.hpp -> ggml-sycl.h
* clear CMAKE to rm unused lib and options
* correct queue: rm dtct:get_queue
* add print tensor function to debug
* fix error: wrong result in 658746bb26702e50f2c59c0e4ada8e9da6010481
* summary dpct definition in one header file to replace folder:dpct
* refactor device log
* mv dpct definition from folder dpct to ggml-sycl.h
* update readme, refactor build script
* fix build with sycl
* set nthread=1 when sycl, increase performance
* add run script, comment debug code
* add ls-sycl-device tool
* add ls-sycl-device, rm unused files
* rm rear space
* dos2unix
* Update README_sycl.md
* fix return type
* remove sycl version from include path
* restore rm code to fix hang issue
* add syc and link for sycl readme
* rm original sycl code before refactor
* fix code err
* add know issue for pvc hang issue
* enable SYCL_F16 support
* align pr4766
* check for sycl blas, better performance
* cleanup 1
* remove extra endif
* add build&run script, clean CMakefile, update guide by review comments
* rename macro to intel hardware
* editor config format
* format fixes
* format fixes
* editor format fix
* Remove unused headers
* skip build sycl tool for other code path
* replace tab by space
* fix blas matmul function
* fix mac build
* restore hip dependency
* fix conflict
* ren as review comments
* mv internal function to .cpp file
* export funciton print_sycl_devices(), mv class dpct definition to source file
* update CI/action for sycl code, fix CI error of repeat/dup
* fix action ID format issue
* rm unused strategy
* enable llama_f16 in ci
* fix conflict
* fix build break on MacOS, due to CI of MacOS depend on external ggml, instead of internal ggml
* fix ci cases for unsupported data type
* revert unrelated changed in cuda cmake
remove useless nommq
fix typo of GGML_USE_CLBLAS_SYCL
* revert hip cmake changes
* fix indent
* add prefix in func name
* revert no mmq
* rm cpu blas duplicate
* fix no_new_line
* fix src1->type==F16 bug.
* pass batch offset for F16 src1
* fix batch error
* fix wrong code
* revert sycl checking in test-sampling
* pass void as arguments of ggml_backend_sycl_print_sycl_devices
* remove extra blank line in test-sampling
* revert setting n_threads in sycl
* implement std::isinf for icpx with fast math.
* Update ci/run.sh
Co-authored-by: Georgi Gerganov <[email protected]>
* Update examples/sycl/run-llama2.sh
Co-authored-by: Georgi Gerganov <[email protected]>
* Update examples/sycl/run-llama2.sh
Co-authored-by: Georgi Gerganov <[email protected]>
* Update CMakeLists.txt
Co-authored-by: Georgi Gerganov <[email protected]>
* Update CMakeLists.txt
Co-authored-by: Georgi Gerganov <[email protected]>
* Update CMakeLists.txt
Co-authored-by: Georgi Gerganov <[email protected]>
* Update CMakeLists.txt
Co-authored-by: Georgi Gerganov <[email protected]>
* add copyright and MIT license declare
* update the cmd example
---------
Co-authored-by: jianyuzh <[email protected]>
Co-authored-by: luoyu-intel <[email protected]>
Co-authored-by: Meng, Hengyu <[email protected]>
Co-authored-by: Georgi Gerganov <[email protected]>
- ggml-backend.c +5 -0
- ggml.c +20 -2
- ggml.h +1 -0
|
@@ -339,6 +339,11 @@ GGML_CALL static void ggml_backend_registry_init(void) {
|
|
| 339 |
ggml_backend_cuda_reg_devices();
|
| 340 |
#endif
|
| 341 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
#ifdef GGML_USE_METAL
|
| 343 |
extern GGML_CALL ggml_backend_t ggml_backend_reg_metal_init(const char * params, void * user_data);
|
| 344 |
extern GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
|
|
|
|
| 339 |
ggml_backend_cuda_reg_devices();
|
| 340 |
#endif
|
| 341 |
|
| 342 |
+
#ifdef GGML_USE_SYCL
|
| 343 |
+
extern void ggml_backend_sycl_reg_devices(void);
|
| 344 |
+
ggml_backend_sycl_reg_devices();
|
| 345 |
+
#endif
|
| 346 |
+
|
| 347 |
#ifdef GGML_USE_METAL
|
| 348 |
extern GGML_CALL ggml_backend_t ggml_backend_reg_metal_init(const char * params, void * user_data);
|
| 349 |
extern GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
|
|
@@ -248,6 +248,8 @@ inline static void * ggml_aligned_malloc(size_t size) {
|
|
| 248 |
#include "ggml-cuda.h"
|
| 249 |
#elif defined(GGML_USE_CLBLAST)
|
| 250 |
#include "ggml-opencl.h"
|
|
|
|
|
|
|
| 251 |
#endif
|
| 252 |
|
| 253 |
// floating point type used to accumulate sums
|
|
@@ -2293,6 +2295,8 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|
| 2293 |
ggml_init_cublas();
|
| 2294 |
#elif defined(GGML_USE_CLBLAST)
|
| 2295 |
ggml_cl_init();
|
|
|
|
|
|
|
| 2296 |
#endif
|
| 2297 |
|
| 2298 |
ggml_setup_op_has_task_pass();
|
|
@@ -14701,6 +14705,12 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
| 14701 |
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_CPU);
|
| 14702 |
#endif // GGML_USE_CUBLAS
|
| 14703 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14704 |
switch (tensor->op) {
|
| 14705 |
case GGML_OP_DUP:
|
| 14706 |
{
|
|
@@ -20280,7 +20290,7 @@ int ggml_cpu_has_wasm_simd(void) {
|
|
| 20280 |
}
|
| 20281 |
|
| 20282 |
int ggml_cpu_has_blas(void) {
|
| 20283 |
-
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST)
|
| 20284 |
return 1;
|
| 20285 |
#else
|
| 20286 |
return 0;
|
|
@@ -20303,8 +20313,16 @@ int ggml_cpu_has_clblast(void) {
|
|
| 20303 |
#endif
|
| 20304 |
}
|
| 20305 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20306 |
int ggml_cpu_has_gpublas(void) {
|
| 20307 |
-
return ggml_cpu_has_cublas() || ggml_cpu_has_clblast();
|
| 20308 |
}
|
| 20309 |
|
| 20310 |
int ggml_cpu_has_sse3(void) {
|
|
|
|
| 248 |
#include "ggml-cuda.h"
|
| 249 |
#elif defined(GGML_USE_CLBLAST)
|
| 250 |
#include "ggml-opencl.h"
|
| 251 |
+
#elif defined(GGML_USE_SYCL)
|
| 252 |
+
#include "ggml-sycl.h"
|
| 253 |
#endif
|
| 254 |
|
| 255 |
// floating point type used to accumulate sums
|
|
|
|
| 2295 |
ggml_init_cublas();
|
| 2296 |
#elif defined(GGML_USE_CLBLAST)
|
| 2297 |
ggml_cl_init();
|
| 2298 |
+
#elif defined(GGML_USE_SYCL)
|
| 2299 |
+
ggml_init_sycl();
|
| 2300 |
#endif
|
| 2301 |
|
| 2302 |
ggml_setup_op_has_task_pass();
|
|
|
|
| 14705 |
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_CPU);
|
| 14706 |
#endif // GGML_USE_CUBLAS
|
| 14707 |
|
| 14708 |
+
#ifdef GGML_USE_SYCL
|
| 14709 |
+
bool skip_cpu = ggml_sycl_compute_forward(params, tensor);
|
| 14710 |
+
if (skip_cpu) {
|
| 14711 |
+
return;
|
| 14712 |
+
}
|
| 14713 |
+
#endif // GGML_USE_SYCL
|
| 14714 |
switch (tensor->op) {
|
| 14715 |
case GGML_OP_DUP:
|
| 14716 |
{
|
|
|
|
| 20290 |
}
|
| 20291 |
|
| 20292 |
int ggml_cpu_has_blas(void) {
|
| 20293 |
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_SYCL)
|
| 20294 |
return 1;
|
| 20295 |
#else
|
| 20296 |
return 0;
|
|
|
|
| 20313 |
#endif
|
| 20314 |
}
|
| 20315 |
|
| 20316 |
+
int ggml_cpu_has_sycl(void) {
|
| 20317 |
+
#if defined(GGML_USE_SYCL)
|
| 20318 |
+
return 1;
|
| 20319 |
+
#else
|
| 20320 |
+
return 0;
|
| 20321 |
+
#endif
|
| 20322 |
+
}
|
| 20323 |
+
|
| 20324 |
int ggml_cpu_has_gpublas(void) {
|
| 20325 |
+
return ggml_cpu_has_cublas() || ggml_cpu_has_clblast() || ggml_cpu_has_sycl();
|
| 20326 |
}
|
| 20327 |
|
| 20328 |
int ggml_cpu_has_sse3(void) {
|
|
@@ -2266,6 +2266,7 @@ extern "C" {
|
|
| 2266 |
GGML_API int ggml_cpu_has_gpublas (void);
|
| 2267 |
GGML_API int ggml_cpu_has_sse3 (void);
|
| 2268 |
GGML_API int ggml_cpu_has_ssse3 (void);
|
|
|
|
| 2269 |
GGML_API int ggml_cpu_has_vsx (void);
|
| 2270 |
|
| 2271 |
//
|
|
|
|
| 2266 |
GGML_API int ggml_cpu_has_gpublas (void);
|
| 2267 |
GGML_API int ggml_cpu_has_sse3 (void);
|
| 2268 |
GGML_API int ggml_cpu_has_ssse3 (void);
|
| 2269 |
+
GGML_API int ggml_cpu_has_sycl (void);
|
| 2270 |
GGML_API int ggml_cpu_has_vsx (void);
|
| 2271 |
|
| 2272 |
//
|