Akarshan Biswas commited on
Commit
0a9c73a
·
1 Parent(s): c699617

SYCL: Remove misleading ggml_sycl_op_flatten function (llama/12387)

Browse files

* SYCL: Remove misleading ggml_sycl_op_flatten function

* remove trailing whitespace

* Fix L2 norm from rebase

* remove try catch block from element_wise.cpp

* remove comment from common.hp

* ggml-sycl.cpp: Add try catch sycl::exception block in compute_forward

* norm.cpp: remove try catch exception block

ggml/src/ggml-sycl/common.cpp CHANGED
@@ -66,41 +66,6 @@ int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block
66
  return sycl_down_blk_size;
67
  }
68
 
69
- void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
70
- const ggml_tensor *src1, ggml_tensor *dst,
71
- const ggml_sycl_op_flatten_t op) try {
72
-
73
- const bool use_src1 = src1 != nullptr;
74
- if(use_src1)
75
- GGML_ASSERT(strcmp(src1->buffer->buft->iface.get_name(src1->buffer->buft), GGML_SYCL_NAME "_Split") != 0);
76
- GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0);
77
-
78
- // dd = data device
79
- float * src0_ddf = (float *) src0->data;
80
- float * src1_ddf = use_src1 ? (float *) src1->data : nullptr;
81
- float * dst_ddf = (float *) dst->data;
82
-
83
- ggml_sycl_pool_alloc<float> src0_f(ctx.pool());
84
- ggml_sycl_pool_alloc<float> src1_f(ctx.pool());
85
- ggml_sycl_pool_alloc<float> dst_f(ctx.pool());
86
-
87
- ggml_sycl_set_device(ctx.device);
88
- queue_ptr main_stream = ctx.stream();
89
- // GGML_SYCL_DEBUG("ctx.device=%d, main_stream=%p src0_on_device=%d, src1_on_device=%d, dst_on_device=%d\n",
90
- // ctx.device, main_stream, src0_on_device, src1_on_device, dst_on_device);
91
-
92
- // do the computation
93
- op(ctx, src0, src1, dst, src0_ddf, src1_ddf, dst_ddf, main_stream);
94
- // print_ggml_tensor("tensor", dst);
95
- }
96
- catch (sycl::exception const &exc) {
97
-
98
- std::cerr << exc.what() << "Exception caught at file:" << __FILE__
99
- << ", line:" << __LINE__ << std::endl;
100
- std::exit(1);
101
- }
102
-
103
-
104
  void release_extra_gpu(ggml_tensor_extra_gpu * extra, std::vector<queue_ptr> streams) {
105
  for (int i = 0; i < ggml_sycl_info().device_count; ++i) {
106
  for (int64_t is = 0; is < GGML_SYCL_MAX_STREAMS; ++is) {
 
66
  return sycl_down_blk_size;
67
  }
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  void release_extra_gpu(ggml_tensor_extra_gpu * extra, std::vector<queue_ptr> streams) {
70
  for (int i = 0; i < ggml_sycl_info().device_count; ++i) {
71
  for (int64_t is = 0; is < GGML_SYCL_MAX_STREAMS; ++is) {
ggml/src/ggml-sycl/common.hpp CHANGED
@@ -494,12 +494,6 @@ static __dpct_inline__ Tp* get_pointer(sycl::local_accessor<Tp, dim> acc) {
494
 
495
  int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block_size);
496
 
497
- typedef void (*ggml_sycl_op_flatten_t)(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
498
- const ggml_tensor *src1,
499
- ggml_tensor *dst, const float *src0_dd,
500
- const float *src1_dd, float *dst_dd,
501
- const queue_ptr &main_stream);
502
-
503
  template<float (*bin_op)(const float, const float), typename src0_t, typename src1_t, typename dst_t>
504
  static void k_bin_bcast(const src0_t * src0, const src1_t * src1, dst_t * dst,
505
  int ne0, int ne1, int ne2, int ne3,
@@ -757,24 +751,22 @@ struct bin_bcast_sycl {
757
 
758
  template <class op>
759
  inline void ggml_sycl_op_bin_bcast(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
760
- const ggml_tensor *src1, ggml_tensor *dst,
761
- const float *src0_dd, const float *src1_dd,
762
- float *dst_dd,
763
- const queue_ptr &main_stream) {
764
 
765
  if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
766
- op()(ctx, src0, src1, dst, src0_dd, src1_dd, dst_dd, main_stream);
767
  } else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) {
768
- op()(ctx, src0, src1, dst, (const sycl::half *)src0_dd, src1_dd,
769
- (sycl::half *)dst_dd, main_stream);
770
  } else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F32) {
771
- op()(ctx, src0, src1, dst, (const sycl::half *)src0_dd, src1_dd, dst_dd,
772
  main_stream);
773
  } else if (src0->type == GGML_TYPE_I32 && dst->type == GGML_TYPE_I32) {
774
- op()(ctx, src0, src1, dst, (const int32_t *)src0_dd, (const int32_t *)src1_dd, (int32_t *)dst_dd,
775
  main_stream);
776
  } else if (src0->type == GGML_TYPE_I16 && dst->type == GGML_TYPE_I16) {
777
- op()(ctx, src0, src1, dst, (const int16_t *)src0_dd, (const int16_t *)src1_dd, (int16_t *)dst_dd,
778
  main_stream);
779
  } else {
780
  fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__,
@@ -784,8 +776,4 @@ inline void ggml_sycl_op_bin_bcast(ggml_backend_sycl_context & ctx, const ggml_t
784
  }
785
 
786
  bool gpu_has_xmx(sycl::device &dev);
787
-
788
- void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
789
- const ggml_tensor *src1, ggml_tensor *dst,
790
- const ggml_sycl_op_flatten_t op);
791
  #endif // GGML_SYCL_COMMON_HPP
 
494
 
495
  int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block_size);
496
 
 
 
 
 
 
 
497
  template<float (*bin_op)(const float, const float), typename src0_t, typename src1_t, typename dst_t>
498
  static void k_bin_bcast(const src0_t * src0, const src1_t * src1, dst_t * dst,
499
  int ne0, int ne1, int ne2, int ne3,
 
751
 
752
  template <class op>
753
  inline void ggml_sycl_op_bin_bcast(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
754
+ const ggml_tensor *src1, ggml_tensor *dst) {
755
+ dpct::queue_ptr main_stream = ctx.stream();
 
 
756
 
757
  if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
758
+ op()(ctx, src0, src1, dst, (const float *)src0->data, (const float *)src1->data, (float *)dst->data, main_stream);
759
  } else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) {
760
+ op()(ctx, src0, src1, dst, (const sycl::half *)src0->data, (const float *)src1->data,
761
+ (sycl::half *)dst->data, main_stream);
762
  } else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F32) {
763
+ op()(ctx, src0, src1, dst, (const sycl::half *)src0->data, (const float *)src1->data, (float *)dst->data,
764
  main_stream);
765
  } else if (src0->type == GGML_TYPE_I32 && dst->type == GGML_TYPE_I32) {
766
+ op()(ctx, src0, src1, dst, (const int32_t *)src0->data, (const int32_t *)src1->data, (int32_t *)dst->data,
767
  main_stream);
768
  } else if (src0->type == GGML_TYPE_I16 && dst->type == GGML_TYPE_I16) {
769
+ op()(ctx, src0, src1, dst, (const int16_t *)src0->data, (const int16_t *)src1->data, (int16_t *)dst->data,
770
  main_stream);
771
  } else {
772
  fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__,
 
776
  }
777
 
778
  bool gpu_has_xmx(sycl::device &dev);
 
 
 
 
779
  #endif // GGML_SYCL_COMMON_HPP
ggml/src/ggml-sycl/element_wise.cpp CHANGED
@@ -509,497 +509,409 @@ static void pad_f32_sycl(const float *x, float *dst, const int ne00,
509
  });
510
  }
511
 
512
- inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
513
- ggml_tensor *dst, const float *src0_dd,
514
- const float *src1_dd, float *dst_dd,
515
- const queue_ptr &main_stream) {
516
 
517
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
518
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
 
 
 
519
 
520
- silu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
521
 
522
- GGML_UNUSED(src1);
523
- GGML_UNUSED(dst);
524
- GGML_UNUSED(src1_dd);
525
- GGML_UNUSED(ctx);
526
  }
527
 
528
- inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
529
- ggml_tensor *dst, const float *src0_dd,
530
- const float *src1_dd, float *dst_dd,
531
- const queue_ptr &main_stream) {
532
 
533
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
534
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
 
 
 
535
 
536
- gelu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
537
-
538
- GGML_UNUSED(src1);
539
- GGML_UNUSED(dst);
540
- GGML_UNUSED(src1_dd);
541
- GGML_UNUSED(ctx);
542
  }
543
- inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
544
- const ggml_tensor *src1, ggml_tensor *dst,
545
- const float *src0_dd, const float *src1_dd,
546
- float *dst_dd,
547
- const queue_ptr &main_stream) {
548
 
549
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
550
- GGML_ASSERT( dst->type == GGML_TYPE_F32);
551
 
552
- gelu_quick_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
 
 
 
 
 
553
 
554
- GGML_UNUSED(src1);
555
- GGML_UNUSED(dst);
556
- GGML_UNUSED(src1_dd);
557
- GGML_UNUSED(ctx);
558
  }
559
 
560
- inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
561
- ggml_tensor *dst, const float *src0_dd,
562
- const float *src1_dd, float *dst_dd,
563
- const queue_ptr &main_stream) {
564
 
565
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
566
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
567
- tanh_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
568
-
569
- GGML_UNUSED(src1);
570
- GGML_UNUSED(dst);
571
- GGML_UNUSED(src1_dd);
572
- GGML_UNUSED(ctx);
573
  }
574
 
575
- inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
576
- ggml_tensor *dst, const float *src0_dd,
577
- const float *src1_dd, float *dst_dd,
578
- const queue_ptr &main_stream) {
579
 
580
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
581
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
 
 
 
582
 
583
- relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
584
-
585
- GGML_UNUSED(src1);
586
- GGML_UNUSED(dst);
587
- GGML_UNUSED(src1_dd);
588
- GGML_UNUSED(ctx);
589
  }
590
 
591
- inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
592
- const ggml_tensor *src1, ggml_tensor *dst,
593
- const float *src0_dd, const float *src1_dd,
594
- float *dst_dd,
595
- const queue_ptr &main_stream) {
596
 
597
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
598
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
 
 
 
599
 
600
- hardsigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
601
-
602
- GGML_UNUSED(src1);
603
- GGML_UNUSED(dst);
604
- GGML_UNUSED(src1_dd);
605
- GGML_UNUSED(ctx);
606
  }
607
 
608
- inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
609
- const ggml_tensor *src1, ggml_tensor *dst,
610
- const float *src0_dd, const float *src1_dd,
611
- float *dst_dd, const queue_ptr &main_stream) {
612
 
613
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
614
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
 
 
 
615
 
616
- hardswish_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
617
-
618
- GGML_UNUSED(src1);
619
- GGML_UNUSED(dst);
620
- GGML_UNUSED(src1_dd);
621
- GGML_UNUSED(ctx);
622
  }
623
 
624
- inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
625
- const ggml_tensor *src1, ggml_tensor *dst,
626
- const float *src0_dd, const float *src1_dd,
627
- float *dst_dd, const queue_ptr &main_stream) {
628
 
629
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
630
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
 
 
 
631
 
632
- exp_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
633
-
634
- GGML_UNUSED(src1);
635
- GGML_UNUSED(dst);
636
- GGML_UNUSED(src1_dd);
637
- GGML_UNUSED(ctx);
638
  }
639
 
640
- inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
641
- const ggml_tensor *src1, ggml_tensor *dst,
642
- const float *src0_dd, const float *src1_dd,
643
- float *dst_dd, const queue_ptr &main_stream) {
644
 
645
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
646
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
 
 
 
647
 
648
- log_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
649
-
650
- GGML_UNUSED(src1);
651
- GGML_UNUSED(dst);
652
- GGML_UNUSED(src1_dd);
653
- GGML_UNUSED(ctx);
654
  }
655
 
656
- inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
657
- const ggml_tensor *src1, ggml_tensor *dst,
658
- const float *src0_dd, const float *src1_dd,
659
- float *dst_dd, const queue_ptr &main_stream) {
660
 
661
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
662
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
 
 
 
663
 
664
- sigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
665
 
666
- GGML_UNUSED(src1);
667
- GGML_UNUSED(dst);
668
- GGML_UNUSED(src1_dd);
669
- GGML_UNUSED(ctx);
670
  }
671
 
672
- inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
673
- const ggml_tensor *src1, ggml_tensor *dst,
674
- const float *src0_dd, const float *src1_dd,
675
- float *dst_dd, const queue_ptr &main_stream) {
676
 
677
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
678
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
 
 
 
679
 
680
- sqrt_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
681
 
682
- GGML_UNUSED(src1);
683
- GGML_UNUSED(dst);
684
- GGML_UNUSED(src1_dd);
685
- GGML_UNUSED(ctx);
686
  }
687
 
688
- inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
689
- const ggml_tensor *src1, ggml_tensor *dst,
690
- const float *src0_dd, const float *src1_dd,
691
- float *dst_dd, const queue_ptr &main_stream) {
692
 
693
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
694
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
 
 
 
695
 
696
- sin_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
697
-
698
- GGML_UNUSED(src1);
699
- GGML_UNUSED(dst);
700
- GGML_UNUSED(src1_dd);
701
- GGML_UNUSED(ctx);
702
  }
703
 
704
- inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
705
- const ggml_tensor *src1, ggml_tensor *dst,
706
- const float *src0_dd, const float *src1_dd,
707
- float *dst_dd, const queue_ptr &main_stream) {
708
 
709
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
710
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
 
 
 
711
 
712
- cos_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
713
-
714
- GGML_UNUSED(src1);
715
- GGML_UNUSED(dst);
716
- GGML_UNUSED(src1_dd);
717
- GGML_UNUSED(ctx);
718
  }
719
 
720
- inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
721
- const ggml_tensor *src1, ggml_tensor *dst,
722
- const float *src0_dd, const float *src1_dd,
723
- float *dst_dd, const queue_ptr &main_stream) {
724
 
725
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
726
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
 
 
 
727
 
728
- step_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
729
-
730
- GGML_UNUSED(src1);
731
- GGML_UNUSED(dst);
732
- GGML_UNUSED(src1_dd);
733
- GGML_UNUSED(ctx);
734
  }
735
 
736
- inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
737
- const ggml_tensor *src1, ggml_tensor *dst,
738
- const float *src0_dd, const float *src1_dd,
739
- float *dst_dd, const queue_ptr &main_stream) {
740
 
741
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
742
- GGML_ASSERT( dst->type == GGML_TYPE_F32);
743
 
744
- neg_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
 
 
 
 
 
745
 
746
- GGML_UNUSED(src1);
747
- GGML_UNUSED(dst);
748
- GGML_UNUSED(src1_dd);
749
- GGML_UNUSED(ctx);
750
  }
751
 
752
- inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
753
- const ggml_tensor *src1, ggml_tensor *dst,
754
- const float *src0_dd, const float *src1_dd,
755
- float *dst_dd,
756
- const queue_ptr &main_stream) {
757
 
758
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
759
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
 
 
 
760
 
761
  float negative_slope;
762
  memcpy(&negative_slope, dst->op_params, sizeof(float));
763
 
764
- leaky_relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), negative_slope, main_stream);
765
-
766
- GGML_UNUSED(src1);
767
- GGML_UNUSED(dst);
768
- GGML_UNUSED(src1_dd);
769
- GGML_UNUSED(ctx);
770
  }
771
 
772
- inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
773
- ggml_tensor *dst, const float *src0_dd,
774
- const float *src1_dd, float *dst_dd,
775
- const queue_ptr &main_stream) {
776
 
777
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
778
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
 
 
 
779
 
780
- sqr_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream);
781
-
782
- GGML_UNUSED(src1);
783
- GGML_UNUSED(dst);
784
- GGML_UNUSED(src1_dd);
785
- GGML_UNUSED(ctx);
786
  }
787
 
788
- inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
789
- const ggml_tensor *src1, ggml_tensor *dst,
790
- const float *src0_dd, const float *src1_dd,
791
- float *dst_dd,
792
- const queue_ptr &main_stream) {
793
 
794
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
795
  GGML_ASSERT(dst->type == GGML_TYPE_F32);
 
 
 
 
796
 
797
- const float sf0 = (float)dst->ne[0]/src0->ne[0];
798
- const float sf1 = (float)dst->ne[1]/src0->ne[1];
799
- const float sf2 = (float)dst->ne[2]/src0->ne[2];
800
- const float sf3 = (float)dst->ne[3]/src0->ne[3];
801
 
802
- upscale_f32_sycl(src0_dd, dst_dd, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3],
803
  dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3,
804
  main_stream);
805
-
806
- GGML_UNUSED(src1);
807
- GGML_UNUSED(dst);
808
- GGML_UNUSED(src1_dd);
809
- GGML_UNUSED(ctx);
810
  }
811
 
812
- inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
813
- ggml_tensor *dst, const float *src0_dd,
814
- const float *src1_dd, float *dst_dd,
815
- const queue_ptr &main_stream) {
816
 
817
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
818
  GGML_ASSERT(dst->type == GGML_TYPE_F32);
819
- GGML_ASSERT(src0->ne[3] == 1 && dst->ne[3] == 1); // just 3D tensors
 
 
 
 
820
 
821
  pad_f32_sycl(src0_dd, dst_dd,
822
- src0->ne[0], src0->ne[1], src0->ne[2],
823
  dst->ne[0], dst->ne[1], dst->ne[2], main_stream);
824
-
825
- GGML_UNUSED(src1);
826
- GGML_UNUSED(dst);
827
- GGML_UNUSED(src1_dd);
828
- GGML_UNUSED(ctx);
829
  }
830
 
831
- inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
832
- ggml_tensor *dst, const float *src0_dd,
833
- const float *src1_dd, float *dst_dd,
834
- const queue_ptr &main_stream) {
835
 
836
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
837
- GGML_ASSERT(src1->type == GGML_TYPE_F32);
838
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
839
  GGML_ASSERT(dst->ne[3] == 1); // just 3D tensors supported
 
 
 
 
 
840
 
841
  int nb1 = dst->op_params[0] / 4; // 4 bytes of float32
842
  int nb2 = dst->op_params[1] / 4; // 4 bytes of float32
843
  // int nb3 = dst->op_params[2] / 4; // 4 bytes of float32 - unused
844
  int offset = dst->op_params[3] / 4; // offset in bytes
845
 
846
- acc_f32_sycl(src0_dd, src1_dd, dst_dd, ggml_nelements(dst), src1->ne[0], src1->ne[1], src1->ne[2], nb1, nb2, offset, main_stream);
847
-
848
- GGML_UNUSED(dst);
849
- GGML_UNUSED(ctx);
850
  }
851
 
852
- inline void ggml_sycl_op_add(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
853
- ggml_tensor *dst, const float *src0_dd,
854
- const float *src1_dd, float *dst_dd,
855
- const queue_ptr &main_stream) {
856
 
857
- ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_add>>(ctx, src0, src1, dst, src0_dd, src1_dd, dst_dd, main_stream);
858
  }
859
 
860
- inline void ggml_sycl_op_sub(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
861
- ggml_tensor *dst, const float *src0_dd,
862
- const float *src1_dd, float *dst_dd,
863
- const queue_ptr &main_stream) {
864
 
865
- ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_sub>>(ctx, src0, src1, dst, src0_dd, src1_dd, dst_dd, main_stream);
866
  }
867
 
868
- inline void ggml_sycl_op_mul(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
869
- ggml_tensor *dst, const float *src0_dd,
870
- const float *src1_dd, float *dst_dd,
871
- const queue_ptr &main_stream) {
872
 
873
- ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_mul>>(ctx, src0, src1, dst, src0_dd, src1_dd, dst_dd, main_stream);
874
  }
875
 
876
- inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
877
- ggml_tensor *dst, const float *src0_dd,
878
- const float *src1_dd, float *dst_dd,
879
- const queue_ptr &main_stream) {
880
 
881
- ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_div>>(ctx, src0, src1, dst, src0_dd, src1_dd, dst_dd, main_stream);
882
  }
883
 
884
 
885
  void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
886
  GGML_SYCL_DEBUG("call %s\n", __func__);
887
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sqrt);
888
  GGML_SYCL_DEBUG("call %s done\n", __func__);
889
  }
890
 
891
  void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
892
  GGML_SYCL_DEBUG("call %s\n", __func__);
893
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sin);
894
  GGML_SYCL_DEBUG("call %s done\n", __func__);
895
  }
896
 
897
  void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
898
  GGML_SYCL_DEBUG("call %s\n", __func__);
899
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_cos);
900
  GGML_SYCL_DEBUG("call %s done\n", __func__);
901
  }
902
 
903
  void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
904
  GGML_SYCL_DEBUG("call %s\n", __func__);
905
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_acc);
906
  GGML_SYCL_DEBUG("call %s done\n", __func__);
907
  }
908
 
909
  void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
910
  GGML_SYCL_DEBUG("call %s\n", __func__);
911
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_gelu);
912
  GGML_SYCL_DEBUG("call %s done\n", __func__);
913
  }
914
 
915
  void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
916
  GGML_SYCL_DEBUG("call %s\n", __func__);
917
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_silu);
918
  GGML_SYCL_DEBUG("call %s done\n", __func__);
919
  }
920
 
921
  void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
922
  GGML_SYCL_DEBUG("call %s\n", __func__);
923
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_gelu_quick);
924
  GGML_SYCL_DEBUG("call %s done\n", __func__);
925
  }
926
 
927
  void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
928
  GGML_SYCL_DEBUG("call %s\n", __func__);
929
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_tanh);
930
  GGML_SYCL_DEBUG("call %s done\n", __func__);
931
  }
932
 
933
  void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
934
  GGML_SYCL_DEBUG("call %s\n", __func__);
935
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_relu);
936
  GGML_SYCL_DEBUG("call %s done\n", __func__);
937
  }
938
 
939
  void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
940
  GGML_SYCL_DEBUG("call %s\n", __func__);
941
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sigmoid);
942
  GGML_SYCL_DEBUG("call %s done\n", __func__);
943
  }
944
 
945
  void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
946
  GGML_SYCL_DEBUG("call %s\n", __func__);
947
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_hardsigmoid);
948
  GGML_SYCL_DEBUG("call %s done\n", __func__);
949
  }
950
 
951
  void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
952
  GGML_SYCL_DEBUG("call %s\n", __func__);
953
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_hardswish);
954
  GGML_SYCL_DEBUG("call %s done\n", __func__);
955
  }
956
 
957
 
958
  void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
959
  GGML_SYCL_DEBUG("call %s\n", __func__);
960
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_exp);
961
  GGML_SYCL_DEBUG("call %s done\n", __func__);
962
  }
963
 
964
  void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
965
  GGML_SYCL_DEBUG("call %s\n", __func__);
966
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_log);
967
  GGML_SYCL_DEBUG("call %s done\n", __func__);
968
  }
969
 
970
  void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
971
  GGML_SYCL_DEBUG("call %s\n", __func__);
972
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_neg);
973
  GGML_SYCL_DEBUG("call %s done\n", __func__);
974
  }
975
 
976
  void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
977
  GGML_SYCL_DEBUG("call %s\n", __func__);
978
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_step);
979
  GGML_SYCL_DEBUG("call %s done\n", __func__);
980
  }
981
 
982
  void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
983
  GGML_SYCL_DEBUG("call %s\n", __func__);
984
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_leaky_relu);
985
  GGML_SYCL_DEBUG("call %s done\n", __func__);
986
  }
987
 
988
  void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
989
  GGML_SYCL_DEBUG("call %s\n", __func__);
990
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sqr);
991
  GGML_SYCL_DEBUG("call %s done\n", __func__);
992
  }
993
 
994
  void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
995
  GGML_SYCL_DEBUG("call %s\n", __func__);
996
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_upscale);
997
  GGML_SYCL_DEBUG("call %s done\n", __func__);
998
  }
999
 
1000
  void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1001
  GGML_SYCL_DEBUG("call %s\n", __func__);
1002
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_pad);
1003
  GGML_SYCL_DEBUG("call %s done\n", __func__);
1004
  }
1005
 
@@ -1007,24 +919,24 @@ void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1007
 
1008
  void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1009
  GGML_SYCL_DEBUG("call %s\n", __func__);
1010
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_add);
1011
  GGML_SYCL_DEBUG("call %s done\n", __func__);
1012
  }
1013
 
1014
  void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1015
  GGML_SYCL_DEBUG("call %s\n", __func__);
1016
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sub);
1017
  GGML_SYCL_DEBUG("call %s done\n", __func__);
1018
  }
1019
 
1020
  void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1021
  GGML_SYCL_DEBUG("call %s\n", __func__);
1022
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_mul);
1023
  GGML_SYCL_DEBUG("call %s done\n", __func__);
1024
  }
1025
 
1026
  void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1027
  GGML_SYCL_DEBUG("call %s\n", __func__);
1028
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_div);
1029
  GGML_SYCL_DEBUG("call %s done\n", __func__);
1030
  }
 
509
  });
510
  }
511
 
512
+ inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
513
 
514
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
515
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
516
+ dpct::queue_ptr main_stream = ctx.stream();
517
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
518
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
519
+ float * dst_dd = static_cast<float *>(dst->data);
520
 
 
521
 
522
+ silu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
 
 
 
523
  }
524
 
525
+ inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
526
 
527
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
528
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
529
+ dpct::queue_ptr main_stream = ctx.stream();
530
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
531
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
532
+ float * dst_dd = static_cast<float *>(dst->data);
533
 
534
+ gelu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
 
 
 
 
 
535
  }
 
 
 
 
 
536
 
537
+ inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
538
 
539
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
540
+ GGML_ASSERT( dst->type == GGML_TYPE_F32);
541
+ dpct::queue_ptr main_stream = ctx.stream();
542
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
543
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
544
+ float * dst_dd = static_cast<float *>(dst->data);
545
 
546
+ gelu_quick_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
 
 
 
547
  }
548
 
549
+ inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
550
 
551
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
552
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
553
+ dpct::queue_ptr main_stream = ctx.stream();
554
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
555
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
556
+ float * dst_dd = static_cast<float *>(dst->data);
557
+ tanh_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
 
558
  }
559
 
560
+ inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
561
 
562
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
563
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
564
+ dpct::queue_ptr main_stream = ctx.stream();
565
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
566
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
567
+ float * dst_dd = static_cast<float *>(dst->data);
568
 
569
+ relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
 
 
 
 
 
570
  }
571
 
572
+ inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
 
573
 
574
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
575
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
576
+ dpct::queue_ptr main_stream = ctx.stream();
577
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
578
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
579
+ float * dst_dd = static_cast<float *>(dst->data);
580
 
581
+ hardsigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
 
 
 
 
 
582
  }
583
 
584
+ inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
585
 
586
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
587
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
588
+ dpct::queue_ptr main_stream = ctx.stream();
589
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
590
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
591
+ float * dst_dd = static_cast<float *>(dst->data);
592
 
593
+ hardswish_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
 
 
 
 
 
594
  }
595
 
596
+ inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
597
 
598
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
599
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
600
+ dpct::queue_ptr main_stream = ctx.stream();
601
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
602
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
603
+ float * dst_dd = static_cast<float *>(dst->data);
604
 
605
+ exp_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
 
 
 
 
 
606
  }
607
 
608
+ inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
609
 
610
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
611
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
612
+ dpct::queue_ptr main_stream = ctx.stream();
613
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
614
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
615
+ float * dst_dd = static_cast<float *>(dst->data);
616
 
617
+ log_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
 
 
 
 
 
618
  }
619
 
620
+ inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
621
 
622
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
623
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
624
+ dpct::queue_ptr main_stream = ctx.stream();
625
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
626
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
627
+ float * dst_dd = static_cast<float *>(dst->data);
628
 
 
629
 
630
+ sigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
 
 
 
631
  }
632
 
633
+ inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
634
 
635
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
636
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
637
+ dpct::queue_ptr main_stream = ctx.stream();
638
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
639
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
640
+ float * dst_dd = static_cast<float *>(dst->data);
641
 
 
642
 
643
+ sqrt_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
 
 
 
644
  }
645
 
646
+ inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
647
 
648
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
649
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
650
+ dpct::queue_ptr main_stream = ctx.stream();
651
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
652
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
653
+ float * dst_dd = static_cast<float *>(dst->data);
654
 
655
+ sin_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
 
 
 
 
 
656
  }
657
 
658
+ inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
659
 
660
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
661
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
662
+ dpct::queue_ptr main_stream = ctx.stream();
663
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
664
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
665
+ float * dst_dd = static_cast<float *>(dst->data);
666
 
667
+ cos_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
 
 
 
 
 
668
  }
669
 
670
+ inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
671
 
672
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
673
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
674
+ dpct::queue_ptr main_stream = ctx.stream();
675
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
676
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
677
+ float * dst_dd = static_cast<float *>(dst->data);
678
 
679
+ step_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
 
 
 
 
 
680
  }
681
 
 
 
 
 
682
 
683
+ inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
684
 
685
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
686
+ GGML_ASSERT( dst->type == GGML_TYPE_F32);
687
+ dpct::queue_ptr main_stream = ctx.stream();
688
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
689
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
690
+ float * dst_dd = static_cast<float *>(dst->data);
691
 
692
+ neg_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
 
 
 
693
  }
694
 
695
+ inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
 
696
 
697
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
698
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
699
+ dpct::queue_ptr main_stream = ctx.stream();
700
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
701
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
702
+ float * dst_dd = static_cast<float *>(dst->data);
703
 
704
  float negative_slope;
705
  memcpy(&negative_slope, dst->op_params, sizeof(float));
706
 
707
+ leaky_relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), negative_slope, main_stream);
 
 
 
 
 
708
  }
709
 
710
+ inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
711
 
712
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
713
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
714
+ dpct::queue_ptr main_stream = ctx.stream();
715
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
716
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
717
+ float * dst_dd = static_cast<float *>(dst->data);
718
 
719
+ sqr_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream);
 
 
 
 
 
720
  }
721
 
722
+ inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
 
723
 
724
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
725
  GGML_ASSERT(dst->type == GGML_TYPE_F32);
726
+ dpct::queue_ptr main_stream = ctx.stream();
727
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
728
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
729
+ float * dst_dd = static_cast<float *>(dst->data);
730
 
731
+ const float sf0 = (float)dst->ne[0]/dst->src[0]->ne[0];
732
+ const float sf1 = (float)dst->ne[1]/dst->src[0]->ne[1];
733
+ const float sf2 = (float)dst->ne[2]/dst->src[0]->ne[2];
734
+ const float sf3 = (float)dst->ne[3]/dst->src[0]->ne[3];
735
 
736
+ upscale_f32_sycl(src0_dd, dst_dd, dst->src[0]->nb[0], dst->src[0]->nb[1], dst->src[0]->nb[2], dst->src[0]->nb[3],
737
  dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3,
738
  main_stream);
 
 
 
 
 
739
  }
740
 
741
+ inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
742
 
743
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
744
  GGML_ASSERT(dst->type == GGML_TYPE_F32);
745
+ GGML_ASSERT(dst->src[0]->ne[3] == 1 && dst->ne[3] == 1); // just 3D tensors
746
+ dpct::queue_ptr main_stream = ctx.stream();
747
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
748
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
749
+ float * dst_dd = static_cast<float *>(dst->data);
750
 
751
  pad_f32_sycl(src0_dd, dst_dd,
752
+ dst->src[0]->ne[0], dst->src[0]->ne[1], dst->src[0]->ne[2],
753
  dst->ne[0], dst->ne[1], dst->ne[2], main_stream);
 
 
 
 
 
754
  }
755
 
756
+ inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
757
 
758
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
759
+ GGML_ASSERT(dst->src[1]->type == GGML_TYPE_F32);
760
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
761
  GGML_ASSERT(dst->ne[3] == 1); // just 3D tensors supported
762
+ dpct::queue_ptr main_stream = ctx.stream();
763
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
764
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
765
+ const float * src1_dd = static_cast<const float*>(dst->src[1]->data);
766
+ float * dst_dd = static_cast<float *>(dst->data);
767
 
768
  int nb1 = dst->op_params[0] / 4; // 4 bytes of float32
769
  int nb2 = dst->op_params[1] / 4; // 4 bytes of float32
770
  // int nb3 = dst->op_params[2] / 4; // 4 bytes of float32 - unused
771
  int offset = dst->op_params[3] / 4; // offset in bytes
772
 
773
+ acc_f32_sycl(src0_dd, src1_dd, dst_dd, ggml_nelements(dst), dst->src[1]->ne[0], dst->src[1]->ne[1], dst->src[1]->ne[2], nb1, nb2, offset, main_stream);
 
 
 
774
  }
775
 
776
+ inline void ggml_sycl_op_add(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
777
 
778
+ ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_add>>(ctx, dst->src[0], dst->src[1], dst);
779
  }
780
 
781
+ inline void ggml_sycl_op_sub(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
782
 
783
+ ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_sub>>(ctx, dst->src[0], dst->src[1], dst);
784
  }
785
 
786
+ inline void ggml_sycl_op_mul(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
787
 
788
+ ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_mul>>(ctx, dst->src[0], dst->src[1], dst);
789
  }
790
 
791
+ inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
792
 
793
+ ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_div>>(ctx, dst->src[0], dst->src[1], dst);
794
  }
795
 
796
 
797
  void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
798
  GGML_SYCL_DEBUG("call %s\n", __func__);
799
+ ggml_sycl_op_sqrt(ctx, dst);
800
  GGML_SYCL_DEBUG("call %s done\n", __func__);
801
  }
802
 
803
  void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
804
  GGML_SYCL_DEBUG("call %s\n", __func__);
805
+ ggml_sycl_op_sin(ctx, dst);
806
  GGML_SYCL_DEBUG("call %s done\n", __func__);
807
  }
808
 
809
  void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
810
  GGML_SYCL_DEBUG("call %s\n", __func__);
811
+ ggml_sycl_op_cos(ctx, dst);
812
  GGML_SYCL_DEBUG("call %s done\n", __func__);
813
  }
814
 
815
  void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
816
  GGML_SYCL_DEBUG("call %s\n", __func__);
817
+ ggml_sycl_op_acc(ctx, dst);
818
  GGML_SYCL_DEBUG("call %s done\n", __func__);
819
  }
820
 
821
  void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
822
  GGML_SYCL_DEBUG("call %s\n", __func__);
823
+ ggml_sycl_op_gelu(ctx, dst);
824
  GGML_SYCL_DEBUG("call %s done\n", __func__);
825
  }
826
 
827
  void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
828
  GGML_SYCL_DEBUG("call %s\n", __func__);
829
+ ggml_sycl_op_silu(ctx, dst);
830
  GGML_SYCL_DEBUG("call %s done\n", __func__);
831
  }
832
 
833
  void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
834
  GGML_SYCL_DEBUG("call %s\n", __func__);
835
+ ggml_sycl_op_gelu_quick(ctx, dst);
836
  GGML_SYCL_DEBUG("call %s done\n", __func__);
837
  }
838
 
839
  void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
840
  GGML_SYCL_DEBUG("call %s\n", __func__);
841
+ ggml_sycl_op_tanh(ctx, dst);
842
  GGML_SYCL_DEBUG("call %s done\n", __func__);
843
  }
844
 
845
  void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
846
  GGML_SYCL_DEBUG("call %s\n", __func__);
847
+ ggml_sycl_op_relu(ctx, dst);
848
  GGML_SYCL_DEBUG("call %s done\n", __func__);
849
  }
850
 
851
  void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
852
  GGML_SYCL_DEBUG("call %s\n", __func__);
853
+ ggml_sycl_op_sigmoid(ctx, dst);
854
  GGML_SYCL_DEBUG("call %s done\n", __func__);
855
  }
856
 
857
  void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
858
  GGML_SYCL_DEBUG("call %s\n", __func__);
859
+ ggml_sycl_op_hardsigmoid(ctx, dst);
860
  GGML_SYCL_DEBUG("call %s done\n", __func__);
861
  }
862
 
863
  void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
864
  GGML_SYCL_DEBUG("call %s\n", __func__);
865
+ ggml_sycl_op_hardswish(ctx, dst);
866
  GGML_SYCL_DEBUG("call %s done\n", __func__);
867
  }
868
 
869
 
870
  void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
871
  GGML_SYCL_DEBUG("call %s\n", __func__);
872
+ ggml_sycl_op_exp(ctx, dst);
873
  GGML_SYCL_DEBUG("call %s done\n", __func__);
874
  }
875
 
876
  void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
877
  GGML_SYCL_DEBUG("call %s\n", __func__);
878
+ ggml_sycl_op_log(ctx, dst);
879
  GGML_SYCL_DEBUG("call %s done\n", __func__);
880
  }
881
 
882
  void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
883
  GGML_SYCL_DEBUG("call %s\n", __func__);
884
+ ggml_sycl_op_neg(ctx, dst);
885
  GGML_SYCL_DEBUG("call %s done\n", __func__);
886
  }
887
 
888
  void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
889
  GGML_SYCL_DEBUG("call %s\n", __func__);
890
+ ggml_sycl_op_step(ctx, dst);
891
  GGML_SYCL_DEBUG("call %s done\n", __func__);
892
  }
893
 
894
  void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
895
  GGML_SYCL_DEBUG("call %s\n", __func__);
896
+ ggml_sycl_op_leaky_relu(ctx, dst);
897
  GGML_SYCL_DEBUG("call %s done\n", __func__);
898
  }
899
 
900
  void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
901
  GGML_SYCL_DEBUG("call %s\n", __func__);
902
+ ggml_sycl_op_sqr(ctx, dst);
903
  GGML_SYCL_DEBUG("call %s done\n", __func__);
904
  }
905
 
906
  void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
907
  GGML_SYCL_DEBUG("call %s\n", __func__);
908
+ ggml_sycl_op_upscale(ctx, dst);
909
  GGML_SYCL_DEBUG("call %s done\n", __func__);
910
  }
911
 
912
  void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
913
  GGML_SYCL_DEBUG("call %s\n", __func__);
914
+ ggml_sycl_op_pad(ctx, dst);
915
  GGML_SYCL_DEBUG("call %s done\n", __func__);
916
  }
917
 
 
919
 
920
  void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
921
  GGML_SYCL_DEBUG("call %s\n", __func__);
922
+ ggml_sycl_op_add(ctx, dst);
923
  GGML_SYCL_DEBUG("call %s done\n", __func__);
924
  }
925
 
926
  void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
927
  GGML_SYCL_DEBUG("call %s\n", __func__);
928
+ ggml_sycl_op_sub(ctx, dst);
929
  GGML_SYCL_DEBUG("call %s done\n", __func__);
930
  }
931
 
932
  void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
933
  GGML_SYCL_DEBUG("call %s\n", __func__);
934
+ ggml_sycl_op_mul(ctx, dst);
935
  GGML_SYCL_DEBUG("call %s done\n", __func__);
936
  }
937
 
938
  void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
939
  GGML_SYCL_DEBUG("call %s\n", __func__);
940
+ ggml_sycl_op_div(ctx, dst);
941
  GGML_SYCL_DEBUG("call %s done\n", __func__);
942
  }
ggml/src/ggml-sycl/getrows.cpp CHANGED
@@ -257,50 +257,54 @@ static void get_rows_sycl_float(ggml_backend_sycl_context & ctx, const ggml_tens
257
  GGML_UNUSED(ctx);
258
  }
259
 
260
- void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
261
- const ggml_tensor *src1, ggml_tensor *dst,
262
- const float *src0_d, const float *src1_d,
263
- float *dst_d, const queue_ptr &stream) {
264
 
265
- GGML_ASSERT(src1->type == GGML_TYPE_I32);
266
  GGML_ASSERT(dst->type == GGML_TYPE_F32);
267
 
268
- GGML_ASSERT(src0->nb[0] == ggml_type_size(src0->type));
269
- GGML_ASSERT(src1->nb[0] == ggml_type_size(src1->type));
270
  GGML_ASSERT(dst->nb[0] == ggml_type_size(dst->type));
271
 
272
- const int32_t * src1_i32 = (const int32_t *) src1_d;
273
-
274
- switch (src0->type) {
275
  case GGML_TYPE_F16:
276
- get_rows_sycl_float(ctx, src0, src1, dst, (const sycl::half *)src0_d,
277
- src1_i32, dst_d, stream);
278
  break;
279
  case GGML_TYPE_F32:
280
- get_rows_sycl_float(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream);
 
281
  break;
282
  case GGML_TYPE_Q4_0:
283
  if (ctx.opt_feature.reorder && dst->op == GGML_OP_MUL_MAT) {
284
- get_rows_sycl_reorder<QK4_0, QR4_0, dequantize_q4_0_reorder>(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream);
 
285
  } else {
286
- get_rows_sycl<QK4_0, QR4_0, dequantize_q4_0>(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream);
 
287
  }
288
  break;
289
  case GGML_TYPE_Q4_1:
290
- get_rows_sycl<QK4_1, QR4_1, dequantize_q4_1>(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream);
 
291
  break;
292
  case GGML_TYPE_Q5_0:
293
- get_rows_sycl<QK5_0, QR5_0, dequantize_q5_0>(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream);
 
294
  break;
295
  case GGML_TYPE_Q5_1:
296
- get_rows_sycl<QK5_1, QR5_1, dequantize_q5_1>(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream);
 
297
  break;
298
  case GGML_TYPE_Q8_0:
299
- get_rows_sycl<QK8_0, QR8_0, dequantize_q8_0>(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream);
 
300
  break;
301
  default:
302
  // TODO: k-quants
303
- GGML_LOG_ERROR("%s: unsupported type: %s\n", __func__, ggml_type_name(src0->type));
304
  GGML_ABORT("fatal error");
305
  }
306
  }
 
257
  GGML_UNUSED(ctx);
258
  }
259
 
260
+ void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
261
 
262
+ GGML_ASSERT(dst->src[1]->type == GGML_TYPE_I32);
263
  GGML_ASSERT(dst->type == GGML_TYPE_F32);
264
 
265
+ GGML_ASSERT(dst->src[0]->nb[0] == ggml_type_size(dst->src[0]->type));
266
+ GGML_ASSERT(dst->src[1]->nb[0] == ggml_type_size(dst->src[1]->type));
267
  GGML_ASSERT(dst->nb[0] == ggml_type_size(dst->type));
268
 
269
+ const int32_t * src1_i32 = (const int32_t *) dst->src[1]->data;
270
+ /* TODO: Refactor and remove duplicates */
271
+ switch (dst->src[0]->type) {
272
  case GGML_TYPE_F16:
273
+ get_rows_sycl_float(ctx, dst->src[0], dst->src[1], dst, (const sycl::half *)dst->src[0]->data,
274
+ src1_i32, (float *)dst->data, ctx.stream());
275
  break;
276
  case GGML_TYPE_F32:
277
+ get_rows_sycl_float(ctx, dst->src[0], dst->src[1], dst, (const float *)dst->src[0]->data,
278
+ src1_i32, (float *)dst->data, ctx.stream());
279
  break;
280
  case GGML_TYPE_Q4_0:
281
  if (ctx.opt_feature.reorder && dst->op == GGML_OP_MUL_MAT) {
282
+ get_rows_sycl_reorder<QK4_0, QR4_0, dequantize_q4_0_reorder>(ctx, dst->src[0], dst->src[1], dst, (const float *)dst->src[0]->data,
283
+ src1_i32, (float *)dst->data, ctx.stream());
284
  } else {
285
+ get_rows_sycl<QK4_0, QR4_0, dequantize_q4_0>(ctx, dst->src[0], dst->src[1], dst, (const float *)dst->src[0]->data,
286
+ src1_i32, (float *)dst->data, ctx.stream());
287
  }
288
  break;
289
  case GGML_TYPE_Q4_1:
290
+ get_rows_sycl<QK4_1, QR4_1, dequantize_q4_1>(ctx, dst->src[0], dst->src[1], dst, (const float *)dst->src[0]->data,
291
+ src1_i32, (float *)dst->data, ctx.stream());
292
  break;
293
  case GGML_TYPE_Q5_0:
294
+ get_rows_sycl<QK5_0, QR5_0, dequantize_q5_0>(ctx, dst->src[0], dst->src[1], dst, (const float *)dst->src[0]->data,
295
+ src1_i32, (float *)dst->data, ctx.stream());
296
  break;
297
  case GGML_TYPE_Q5_1:
298
+ get_rows_sycl<QK5_1, QR5_1, dequantize_q5_1>(ctx, dst->src[0], dst->src[1], dst, (const float *)dst->src[0]->data,
299
+ src1_i32, (float *)dst->data, ctx.stream());
300
  break;
301
  case GGML_TYPE_Q8_0:
302
+ get_rows_sycl<QK8_0, QR8_0, dequantize_q8_0>(ctx, dst->src[0], dst->src[1], dst, (const float *)dst->src[0]->data,
303
+ src1_i32, (float *)dst->data, ctx.stream());
304
  break;
305
  default:
306
  // TODO: k-quants
307
+ GGML_LOG_ERROR("%s: unsupported type: %s\n", __func__, ggml_type_name(dst->src[0]->type));
308
  GGML_ABORT("fatal error");
309
  }
310
  }
ggml/src/ggml-sycl/getrows.hpp CHANGED
@@ -15,9 +15,6 @@
15
 
16
  #include "common.hpp"
17
 
18
- void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
19
- const ggml_tensor *src1, ggml_tensor *dst,
20
- const float *src0_d, const float *src1_d,
21
- float *dst_d, const queue_ptr &stream);
22
 
23
  #endif // GGML_SYCL_GETROWS_HPP
 
15
 
16
  #include "common.hpp"
17
 
18
+ void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor *dst);
 
 
 
19
 
20
  #endif // GGML_SYCL_GETROWS_HPP
ggml/src/ggml-sycl/ggml-sycl.cpp CHANGED
@@ -1988,16 +1988,8 @@ catch (sycl::exception const &exc) {
1988
  std::exit(1);
1989
  }
1990
 
1991
- static void ggml_sycl_op_repeat(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
1992
- const ggml_tensor *src1, ggml_tensor *dst,
1993
- const float *src0_d, const float *src1_d,
1994
- float *dst_d,
1995
- const queue_ptr &main_stream) {
1996
-
1997
- ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_repeat>>(ctx, dst, src0, dst, nullptr, src0_d, dst_d, main_stream);
1998
-
1999
- GGML_UNUSED(src1);
2000
- GGML_UNUSED(src1_d);
2001
  }
2002
 
2003
 
@@ -2132,13 +2124,14 @@ catch (sycl::exception const &exc) {
2132
  std::exit(1);
2133
  }
2134
 
2135
- static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
2136
- const ggml_tensor *src1, ggml_tensor *dst,
2137
- const float *src0_dd, const float *src1_dd,
2138
- float *dst_dd, const queue_ptr &main_stream) {
2139
 
2140
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
2141
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
 
 
 
2142
 
2143
  const int32_t * opts = (const int32_t *)dst->op_params;
2144
  enum ggml_op_pool op = static_cast<ggml_op_pool>(opts[0]);
@@ -2149,8 +2142,8 @@ static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, const ggml_tens
2149
  const int p0 = opts[5];
2150
  const int p1 = opts[6];
2151
 
2152
- const int64_t IH = src0->ne[1];
2153
- const int64_t IW = src0->ne[0];
2154
 
2155
  const int64_t N = dst->ne[3];
2156
  const int64_t OC = dst->ne[2];
@@ -2169,163 +2162,125 @@ static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, const ggml_tens
2169
  parallel_elements, src0_dd, dst_dd, op,
2170
  item_ct1);
2171
  });
2172
-
2173
- GGML_UNUSED(src1);
2174
- GGML_UNUSED(src1_dd);
2175
- GGML_UNUSED(ctx);
2176
  }
2177
 
2178
- inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
2179
- const ggml_tensor *src1, ggml_tensor *dst,
2180
- const float *src0_dd, const float *src1_dd,
2181
- float *dst_dd,
2182
- const queue_ptr &main_stream) {
2183
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
2184
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
 
 
 
2185
 
2186
- const int64_t ne = ggml_nelements(src0);
2187
 
2188
  sum_rows_f32_sycl(src0_dd, dst_dd, ne, 1, main_stream);
2189
-
2190
- GGML_UNUSED(src1);
2191
- GGML_UNUSED(dst);
2192
- GGML_UNUSED(src1_dd);
2193
- GGML_UNUSED(ctx);
2194
  }
2195
 
2196
- inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
2197
- const ggml_tensor *src1, ggml_tensor *dst,
2198
- const float *src0_dd, const float *src1_dd,
2199
- float *dst_dd,
2200
- const queue_ptr &main_stream) {
2201
 
2202
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
2203
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
 
 
 
2204
 
2205
- const int64_t ncols = src0->ne[0];
2206
- const int64_t nrows = ggml_nrows(src0);
2207
 
2208
  sum_rows_f32_sycl(src0_dd, dst_dd, ncols, nrows, main_stream);
2209
-
2210
- GGML_UNUSED(src1);
2211
- GGML_UNUSED(dst);
2212
- GGML_UNUSED(src1_dd);
2213
- GGML_UNUSED(ctx);
2214
  }
2215
 
2216
- inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
2217
- const ggml_tensor *src1, ggml_tensor *dst,
2218
- const float *src0_dd, const float *src1_dd,
2219
- float *dst_dd,
2220
- const queue_ptr &main_stream) {
 
 
2221
 
2222
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
2223
- GGML_ASSERT( dst->type == GGML_TYPE_I32);
2224
 
2225
- const int64_t ncols = src0->ne[0];
2226
- const int64_t nrows = ggml_nrows(src0);
2227
 
2228
  enum ggml_sort_order order = (enum ggml_sort_order) dst->op_params[0];
2229
 
2230
- argsort_f32_i32_sycl(src0_dd, (int *)dst_dd, ncols, nrows, order, main_stream);
2231
-
2232
- GGML_UNUSED(src1);
2233
- GGML_UNUSED(dst);
2234
- GGML_UNUSED(src1_dd);
2235
- GGML_UNUSED(ctx);
2236
  }
2237
 
2238
- inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
2239
- const ggml_tensor *src1, ggml_tensor *dst,
2240
- const float *src0_dd, const float *src1_dd,
2241
- float *dst_dd,
2242
- const queue_ptr &main_stream) {
2243
 
2244
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
2245
  GGML_ASSERT( dst->type == GGML_TYPE_I32);
2246
 
2247
- const int64_t ncols = src0->ne[0];
2248
- const int64_t nrows = ggml_nrows(src0);
 
 
2249
 
2250
- argmax_f32_i32_sycl(src0_dd, (int *)dst_dd, ncols, nrows, main_stream);
 
2251
 
2252
- GGML_UNUSED(src1);
2253
- GGML_UNUSED(dst);
2254
- GGML_UNUSED(src1_dd);
2255
- GGML_UNUSED(ctx);
2256
  }
2257
 
2258
- inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
2259
- const ggml_tensor *src1,
2260
- ggml_tensor *dst, const float *src0_dd,
2261
- const float *src1_dd, float *dst_dd,
2262
- const queue_ptr &main_stream) {
2263
 
2264
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
2265
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
 
 
 
2266
 
2267
- const int64_t ne00 = src0->ne[0];
2268
- const int64_t ne01 = src0->ne[1];
2269
- const int nrows0 = ggml_nrows(src0);
2270
 
2271
  const int n_past = ((int32_t *) dst->op_params)[0];
2272
 
2273
  diag_mask_inf_f32_sycl(src0_dd, dst_dd, ne00, nrows0, ne01, n_past, main_stream);
2274
-
2275
- GGML_UNUSED(src1);
2276
- GGML_UNUSED(dst);
2277
- GGML_UNUSED(src1_dd);
2278
- GGML_UNUSED(ctx);
2279
  }
2280
 
2281
- inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
2282
- ggml_tensor *dst, const float *src0_dd,
2283
- const float *src1_dd, float *dst_dd,
2284
- const queue_ptr &main_stream) {
2285
 
2286
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
2287
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
 
 
 
2288
 
2289
  float scale;
2290
  memcpy(&scale, dst->op_params, sizeof(float));
2291
 
2292
- scale_f32_sycl(src0_dd, dst_dd, scale, ggml_nelements(src0), main_stream);
2293
  /*
2294
  DPCT1010:87: SYCL uses exceptions to report errors and does not use the
2295
  error codes. The call was replaced with 0. You need to rewrite this code.
2296
  */
2297
  SYCL_CHECK(0);
2298
-
2299
- GGML_UNUSED(src1);
2300
- GGML_UNUSED(dst);
2301
- GGML_UNUSED(src1_dd);
2302
- GGML_UNUSED(ctx);
2303
  }
2304
 
2305
- inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
2306
- ggml_tensor *dst, const float *src0_dd,
2307
- const float *src1_dd, float *dst_dd,
2308
- const queue_ptr &main_stream) {
2309
 
2310
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
2311
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
 
 
2312
 
2313
  float min;
2314
  float max;
2315
  memcpy(&min, dst->op_params, sizeof(float));
2316
  memcpy(&max, (float *) dst->op_params + 1, sizeof(float));
2317
 
2318
- clamp_f32_sycl(src0_dd, dst_dd, min, max, ggml_nelements(src0), main_stream);
2319
  /*
2320
  DPCT1010:88: SYCL uses exceptions to report errors and does not use the
2321
  error codes. The call was replaced with 0. You need to rewrite this code.
2322
  */
2323
  SYCL_CHECK(0);
2324
-
2325
- GGML_UNUSED(src1);
2326
- GGML_UNUSED(dst);
2327
- GGML_UNUSED(src1_dd);
2328
- GGML_UNUSED(ctx);
2329
  }
2330
 
2331
  static void ggml_sycl_set_peer_access(const int n_tokens, int main_device) {
@@ -2695,37 +2650,37 @@ catch (sycl::exception const &exc) {
2695
 
2696
  static void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
2697
  GGML_SYCL_DEBUG("call %s\n", __func__);
2698
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_repeat);
2699
  GGML_SYCL_DEBUG("call %s done\n", __func__);
2700
  }
2701
 
2702
  static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
2703
  GGML_SYCL_DEBUG("call %s\n", __func__);
2704
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_get_rows);
2705
  GGML_SYCL_DEBUG("call %s done\n", __func__);
2706
  }
2707
 
2708
  static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
2709
  GGML_SYCL_DEBUG("call %s\n", __func__);
2710
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_norm);
2711
  GGML_SYCL_DEBUG("call %s done\n", __func__);
2712
  }
2713
 
2714
  static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
2715
  GGML_SYCL_DEBUG("call %s\n", __func__);
2716
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_rms_norm);
2717
  GGML_SYCL_DEBUG("call %s done\n", __func__);
2718
  }
2719
 
2720
  static void ggml_sycl_l2_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
2721
  GGML_SYCL_DEBUG("call %s\n", __func__);
2722
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_l2_norm);
2723
  GGML_SYCL_DEBUG("call %s done\n", __func__);
2724
  }
2725
 
2726
  static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
2727
  GGML_SYCL_DEBUG("call %s\n", __func__);
2728
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_group_norm);
2729
  GGML_SYCL_DEBUG("call %s done\n", __func__);
2730
  }
2731
 
@@ -3269,48 +3224,48 @@ catch (sycl::exception const &exc) {
3269
  }
3270
 
3271
  static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3272
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_scale);
3273
  }
3274
 
3275
  static void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3276
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_clamp);
3277
  }
3278
 
3279
  static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3280
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_diag_mask_inf);
3281
  }
3282
 
3283
  static void ggml_sycl_rope(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3284
  GGML_ASSERT(ggml_is_contiguous(dst->src[0])); // TODO: this restriction is temporary until non-cont support is implemented
3285
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_rope);
3286
  }
3287
 
3288
  static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3289
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_pool2d);
3290
  }
3291
 
3292
  static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3293
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_im2col);
3294
  }
3295
 
3296
  static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3297
  GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
3298
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sum);
3299
  }
3300
 
3301
  static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3302
  GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
3303
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sum_rows);
3304
  }
3305
 
3306
  static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3307
  GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
3308
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_argsort);
3309
  }
3310
 
3311
  static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3312
  GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
3313
- ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_argmax);
3314
  }
3315
 
3316
 
@@ -3335,7 +3290,7 @@ catch (sycl::exception const &exc) {
3335
  std::exit(1);
3336
  }
3337
 
3338
- static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tensor * dst) {
3339
  if (!g_sycl_loaded) return false;
3340
 
3341
  if (dst->src[0] != nullptr && ggml_backend_buffer_is_sycl_split(dst->src[0]->buffer)) {
@@ -3528,6 +3483,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg
3528
  }
3529
 
3530
  return true;
 
 
 
3531
  }
3532
 
3533
  GGML_API void ggml_backend_sycl_get_device_description(int device, char *description,
 
1988
  std::exit(1);
1989
  }
1990
 
1991
+ static void ggml_sycl_op_repeat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
1992
+ ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_repeat>>(ctx, dst, dst->src[0], dst);
 
 
 
 
 
 
 
 
1993
  }
1994
 
1995
 
 
2124
  std::exit(1);
2125
  }
2126
 
2127
+ static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
2128
 
2129
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
2130
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
2131
+ dpct::queue_ptr main_stream = ctx.stream();
2132
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
2133
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
2134
+ float * dst_dd = static_cast<float *>(dst->data);
2135
 
2136
  const int32_t * opts = (const int32_t *)dst->op_params;
2137
  enum ggml_op_pool op = static_cast<ggml_op_pool>(opts[0]);
 
2142
  const int p0 = opts[5];
2143
  const int p1 = opts[6];
2144
 
2145
+ const int64_t IH = dst->src[0]->ne[1];
2146
+ const int64_t IW = dst->src[0]->ne[0];
2147
 
2148
  const int64_t N = dst->ne[3];
2149
  const int64_t OC = dst->ne[2];
 
2162
  parallel_elements, src0_dd, dst_dd, op,
2163
  item_ct1);
2164
  });
 
 
 
 
2165
  }
2166
 
2167
+ inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
2168
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
 
 
 
 
2169
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
2170
+ dpct::queue_ptr main_stream = ctx.stream();
2171
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
2172
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
2173
+ float * dst_dd = static_cast<float *>(dst->data);
2174
 
2175
+ const int64_t ne = ggml_nelements(dst->src[0]);
2176
 
2177
  sum_rows_f32_sycl(src0_dd, dst_dd, ne, 1, main_stream);
 
 
 
 
 
2178
  }
2179
 
2180
+ inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
 
2181
 
2182
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
2183
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
2184
+ dpct::queue_ptr main_stream = ctx.stream();
2185
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
2186
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
2187
+ float * dst_dd = static_cast<float *>(dst->data);
2188
 
2189
+ const int64_t ncols = dst->src[0]->ne[0];
2190
+ const int64_t nrows = ggml_nrows(dst->src[0]);
2191
 
2192
  sum_rows_f32_sycl(src0_dd, dst_dd, ncols, nrows, main_stream);
 
 
 
 
 
2193
  }
2194
 
2195
+ inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
2196
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
2197
+ GGML_ASSERT(dst->type == GGML_TYPE_I32);
2198
+ dpct::queue_ptr main_stream = ctx.stream();
2199
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
2200
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
2201
+ int32_t * dst_dd = static_cast<int32_t *>(dst->data);
2202
 
 
 
2203
 
2204
+ const int64_t ncols = dst->src[0]->ne[0];
2205
+ const int64_t nrows = ggml_nrows(dst->src[0]);
2206
 
2207
  enum ggml_sort_order order = (enum ggml_sort_order) dst->op_params[0];
2208
 
2209
+ argsort_f32_i32_sycl(src0_dd, (int *) dst_dd, ncols, nrows, order, main_stream);
 
 
 
 
 
2210
  }
2211
 
2212
+ inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
 
2213
 
2214
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
2215
  GGML_ASSERT( dst->type == GGML_TYPE_I32);
2216
 
2217
+ dpct::queue_ptr main_stream = ctx.stream();
2218
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
2219
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
2220
+ int32_t * dst_dd = static_cast<int32_t *>(dst->data);
2221
 
2222
+ const int64_t ncols = dst->src[0]->ne[0];
2223
+ const int64_t nrows = ggml_nrows(dst->src[0]);
2224
 
2225
+ argmax_f32_i32_sycl(src0_dd, dst_dd, ncols, nrows, main_stream);
 
 
 
2226
  }
2227
 
2228
+ inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx,ggml_tensor *dst) {
 
 
 
 
2229
 
2230
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
2231
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
2232
+ dpct::queue_ptr main_stream = ctx.stream();
2233
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
2234
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
2235
+ float * dst_dd = static_cast<float *>(dst->data);
2236
 
2237
+ const int64_t ne00 = dst->src[0]->ne[0];
2238
+ const int64_t ne01 = dst->src[0]->ne[1];
2239
+ const int nrows0 = ggml_nrows(dst->src[0]);
2240
 
2241
  const int n_past = ((int32_t *) dst->op_params)[0];
2242
 
2243
  diag_mask_inf_f32_sycl(src0_dd, dst_dd, ne00, nrows0, ne01, n_past, main_stream);
 
 
 
 
 
2244
  }
2245
 
2246
+ inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
2247
 
2248
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
2249
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
2250
+ dpct::queue_ptr main_stream = ctx.stream();
2251
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
2252
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
2253
+ float * dst_dd = static_cast<float *>(dst->data);
2254
 
2255
  float scale;
2256
  memcpy(&scale, dst->op_params, sizeof(float));
2257
 
2258
+ scale_f32_sycl(src0_dd, dst_dd, scale, ggml_nelements(dst->src[0]), main_stream);
2259
  /*
2260
  DPCT1010:87: SYCL uses exceptions to report errors and does not use the
2261
  error codes. The call was replaced with 0. You need to rewrite this code.
2262
  */
2263
  SYCL_CHECK(0);
 
 
 
 
 
2264
  }
2265
 
2266
+ inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
2267
 
2268
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
2269
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
2270
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
2271
+ float * dst_dd = static_cast<float *>(dst->data);
2272
 
2273
  float min;
2274
  float max;
2275
  memcpy(&min, dst->op_params, sizeof(float));
2276
  memcpy(&max, (float *) dst->op_params + 1, sizeof(float));
2277
 
2278
+ clamp_f32_sycl(src0_dd, dst_dd, min, max, ggml_nelements(dst->src[0]), ctx.stream());
2279
  /*
2280
  DPCT1010:88: SYCL uses exceptions to report errors and does not use the
2281
  error codes. The call was replaced with 0. You need to rewrite this code.
2282
  */
2283
  SYCL_CHECK(0);
 
 
 
 
 
2284
  }
2285
 
2286
  static void ggml_sycl_set_peer_access(const int n_tokens, int main_device) {
 
2650
 
2651
  static void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
2652
  GGML_SYCL_DEBUG("call %s\n", __func__);
2653
+ ggml_sycl_op_repeat(ctx, dst);
2654
  GGML_SYCL_DEBUG("call %s done\n", __func__);
2655
  }
2656
 
2657
  static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
2658
  GGML_SYCL_DEBUG("call %s\n", __func__);
2659
+ ggml_sycl_op_get_rows(ctx, dst);
2660
  GGML_SYCL_DEBUG("call %s done\n", __func__);
2661
  }
2662
 
2663
  static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
2664
  GGML_SYCL_DEBUG("call %s\n", __func__);
2665
+ ggml_sycl_op_norm(ctx, dst);
2666
  GGML_SYCL_DEBUG("call %s done\n", __func__);
2667
  }
2668
 
2669
  static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
2670
  GGML_SYCL_DEBUG("call %s\n", __func__);
2671
+ ggml_sycl_op_rms_norm(ctx, dst);
2672
  GGML_SYCL_DEBUG("call %s done\n", __func__);
2673
  }
2674
 
2675
  static void ggml_sycl_l2_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
2676
  GGML_SYCL_DEBUG("call %s\n", __func__);
2677
+ ggml_sycl_op_l2_norm(ctx, dst);
2678
  GGML_SYCL_DEBUG("call %s done\n", __func__);
2679
  }
2680
 
2681
  static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
2682
  GGML_SYCL_DEBUG("call %s\n", __func__);
2683
+ ggml_sycl_op_group_norm(ctx, dst);
2684
  GGML_SYCL_DEBUG("call %s done\n", __func__);
2685
  }
2686
 
 
3224
  }
3225
 
3226
  static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3227
+ ggml_sycl_op_scale(ctx, dst);
3228
  }
3229
 
3230
  static void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3231
+ ggml_sycl_op_clamp(ctx, dst);
3232
  }
3233
 
3234
  static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3235
+ ggml_sycl_op_diag_mask_inf(ctx, dst);
3236
  }
3237
 
3238
  static void ggml_sycl_rope(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3239
  GGML_ASSERT(ggml_is_contiguous(dst->src[0])); // TODO: this restriction is temporary until non-cont support is implemented
3240
+ ggml_sycl_op_rope(ctx, dst);
3241
  }
3242
 
3243
  static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3244
+ ggml_sycl_op_pool2d(ctx, dst);
3245
  }
3246
 
3247
  static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3248
+ ggml_sycl_op_im2col(ctx, dst);
3249
  }
3250
 
3251
  static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3252
  GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
3253
+ ggml_sycl_op_sum(ctx, dst);
3254
  }
3255
 
3256
  static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3257
  GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
3258
+ ggml_sycl_op_sum_rows(ctx, dst);
3259
  }
3260
 
3261
  static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3262
  GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
3263
+ ggml_sycl_op_argsort(ctx, dst);
3264
  }
3265
 
3266
  static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3267
  GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
3268
+ ggml_sycl_op_argmax(ctx, dst);
3269
  }
3270
 
3271
 
 
3290
  std::exit(1);
3291
  }
3292
 
3293
+ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tensor * dst) try {
3294
  if (!g_sycl_loaded) return false;
3295
 
3296
  if (dst->src[0] != nullptr && ggml_backend_buffer_is_sycl_split(dst->src[0]->buffer)) {
 
3483
  }
3484
 
3485
  return true;
3486
+ } catch (sycl::exception & e) {
3487
+ std::cerr << e.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
3488
+ std::exit(1);
3489
  }
3490
 
3491
  GGML_API void ggml_backend_sycl_get_device_description(int device, char *description,
ggml/src/ggml-sycl/im2col.cpp CHANGED
@@ -82,10 +82,9 @@ static void im2col_sycl(
82
  }
83
  }
84
 
85
- void ggml_sycl_op_im2col(
86
- ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
87
- ggml_tensor *dst, const float *src0_dd, const float *src1_dd, float *dst_dd,
88
- const queue_ptr &main_stream) {
89
 
90
  GGML_ASSERT(src0->type == GGML_TYPE_F16);
91
  GGML_ASSERT(src1->type == GGML_TYPE_F32);
@@ -115,12 +114,8 @@ void ggml_sycl_op_im2col(
115
  const size_t batch_offset = src1->nb[3] / 4; // nb is byte offset, src is type float32
116
 
117
  if (dst->type == GGML_TYPE_F16) {
118
- im2col_sycl(src1_dd, (sycl::half *)dst_dd, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, main_stream);
119
  } else {
120
- im2col_sycl(src1_dd, (float *)dst_dd, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, main_stream);
121
  }
122
-
123
- GGML_UNUSED(src0);
124
- GGML_UNUSED(src0_dd);
125
- GGML_UNUSED(ctx);
126
  }
 
82
  }
83
  }
84
 
85
+ void ggml_sycl_op_im2col(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
86
+ const ggml_tensor * src0 = dst->src[0];
87
+ const ggml_tensor * src1 = dst->src[1];
 
88
 
89
  GGML_ASSERT(src0->type == GGML_TYPE_F16);
90
  GGML_ASSERT(src1->type == GGML_TYPE_F32);
 
114
  const size_t batch_offset = src1->nb[3] / 4; // nb is byte offset, src is type float32
115
 
116
  if (dst->type == GGML_TYPE_F16) {
117
+ im2col_sycl((const float *) src1->data, (sycl::half *)dst->data, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, ctx.stream());
118
  } else {
119
+ im2col_sycl((const float *) src1->data, (float *)dst->data, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, ctx.stream());
120
  }
 
 
 
 
121
  }
ggml/src/ggml-sycl/im2col.hpp CHANGED
@@ -16,8 +16,6 @@
16
  #include "common.hpp"
17
 
18
  void ggml_sycl_op_im2col(
19
- ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
20
- ggml_tensor *dst, const float *src0_dd, const float *src1_dd, float *dst_dd,
21
- const queue_ptr &main_stream);
22
 
23
  #endif // GGML_SYCL_IM2COL_HPP
 
16
  #include "common.hpp"
17
 
18
  void ggml_sycl_op_im2col(
19
+ ggml_backend_sycl_context & ctx, ggml_tensor *dst);
 
 
20
 
21
  #endif // GGML_SYCL_IM2COL_HPP
ggml/src/ggml-sycl/norm.cpp CHANGED
@@ -397,90 +397,78 @@ static void l2_norm_f32_sycl(const float* x, float* dst, const int ncols,
397
  }
398
  }
399
 
400
- void ggml_sycl_op_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, const ggml_tensor* src1,
401
- ggml_tensor* dst, const float* src0_dd,
402
- const float* src1_dd, float* dst_dd,
403
- const queue_ptr& main_stream) {
404
 
405
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
406
  GGML_ASSERT(dst->type == GGML_TYPE_F32);
407
 
408
- const int64_t ne00 = src0->ne[0];
409
- const int64_t nrows = ggml_nrows(src0);
 
 
 
 
410
 
411
  float eps;
412
  memcpy(&eps, dst->op_params, sizeof(float));
413
 
414
  norm_f32_sycl(src0_dd, dst_dd, ne00, nrows, eps, main_stream, ctx.device);
415
-
416
- (void)src1;
417
- (void)dst;
418
- (void)src1_dd;
419
  }
420
 
421
- void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
422
- const ggml_tensor* src1, ggml_tensor* dst,
423
- const float* src0_dd, const float* src1_dd,
424
- float* dst_dd,
425
- const queue_ptr& main_stream) {
426
 
427
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
428
  GGML_ASSERT(dst->type == GGML_TYPE_F32);
429
 
430
  int num_groups = dst->op_params[0];
 
 
 
 
 
431
 
432
  float eps;
433
  memcpy(&eps, dst->op_params + 1, sizeof(float));
434
 
435
- int group_size = src0->ne[0] * src0->ne[1] * ((src0->ne[2] + num_groups - 1) / num_groups);
436
- group_norm_f32_sycl(src0_dd, dst_dd, num_groups, eps, group_size, src0->ne[0] * src0->ne[1] * src0->ne[2], main_stream, ctx.device);
437
-
438
- (void)src1;
439
- (void)dst;
440
- (void)src1_dd;
441
- GGML_UNUSED(ctx);
442
  }
443
 
444
- void ggml_sycl_op_rms_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
445
- const ggml_tensor* src1, ggml_tensor* dst,
446
- const float* src0_dd, const float* src1_dd,
447
- float* dst_dd,
448
- const queue_ptr& main_stream) {
449
 
450
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
451
  GGML_ASSERT(dst->type == GGML_TYPE_F32);
452
 
453
- const int64_t ne00 = src0->ne[0];
454
- const int64_t nrows = ggml_nrows(src0);
 
 
 
 
 
455
 
456
  float eps;
457
  memcpy(&eps, dst->op_params, sizeof(float));
458
 
459
  rms_norm_f32_sycl(src0_dd, dst_dd, ne00, nrows, eps, main_stream, ctx.device);
460
-
461
- (void)src1;
462
- (void)dst;
463
- (void)src1_dd;
464
  }
465
 
466
- void ggml_sycl_op_l2_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
467
- const ggml_tensor* src1, ggml_tensor* dst,
468
- const float* src0_dd, const float* src1_dd,
469
- float* dst_dd,
470
- const queue_ptr& main_stream) {
471
 
472
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
473
  GGML_ASSERT(dst->type == GGML_TYPE_F32);
474
 
475
- const int64_t ne00 = src0->ne[0];
476
- const int64_t nrows = ggml_nrows(src0);
 
 
 
 
 
477
 
478
  float eps;
479
  memcpy(&eps, dst->op_params, sizeof(float));
480
 
481
  l2_norm_f32_sycl(src0_dd, dst_dd, ne00, nrows, eps, main_stream, ctx.device);
482
 
483
- (void)src1;
484
- (void)dst;
485
- (void)src1_dd;
486
  }
 
397
  }
398
  }
399
 
400
+ void ggml_sycl_op_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
 
 
 
401
 
402
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
403
  GGML_ASSERT(dst->type == GGML_TYPE_F32);
404
 
405
+ const int64_t ne00 = dst->src[0]->ne[0];
406
+ const int64_t nrows = ggml_nrows(dst->src[0]);
407
+ dpct::queue_ptr main_stream = ctx.stream();
408
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
409
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
410
+ float * dst_dd = static_cast<float *>(dst->data);
411
 
412
  float eps;
413
  memcpy(&eps, dst->op_params, sizeof(float));
414
 
415
  norm_f32_sycl(src0_dd, dst_dd, ne00, nrows, eps, main_stream, ctx.device);
 
 
 
 
416
  }
417
 
418
+ void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
 
 
 
 
419
 
420
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
421
  GGML_ASSERT(dst->type == GGML_TYPE_F32);
422
 
423
  int num_groups = dst->op_params[0];
424
+ dpct::queue_ptr main_stream = ctx.stream();
425
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
426
+
427
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
428
+ float * dst_dd = static_cast<float *>(dst->data);
429
 
430
  float eps;
431
  memcpy(&eps, dst->op_params + 1, sizeof(float));
432
 
433
+ int group_size = dst->src[0]->ne[0] * dst->src[0]->ne[1] * ((dst->src[0]->ne[2] + num_groups - 1) / num_groups);
434
+ group_norm_f32_sycl(src0_dd, dst_dd, num_groups, eps, group_size, dst->src[0]->ne[0] * dst->src[0]->ne[1] * dst->src[0]->ne[2], main_stream, ctx.device);
 
 
 
 
 
435
  }
436
 
437
+ void ggml_sycl_op_rms_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
 
 
 
 
438
 
439
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
440
  GGML_ASSERT(dst->type == GGML_TYPE_F32);
441
 
442
+ const int64_t ne00 = dst->src[0]->ne[0];
443
+ const int64_t nrows = ggml_nrows(dst->src[0]);
444
+ dpct::queue_ptr main_stream = ctx.stream();
445
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
446
+
447
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
448
+ float * dst_dd = static_cast<float *>(dst->data);
449
 
450
  float eps;
451
  memcpy(&eps, dst->op_params, sizeof(float));
452
 
453
  rms_norm_f32_sycl(src0_dd, dst_dd, ne00, nrows, eps, main_stream, ctx.device);
 
 
 
 
454
  }
455
 
456
+ void ggml_sycl_op_l2_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
 
 
 
 
457
 
458
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
459
  GGML_ASSERT(dst->type == GGML_TYPE_F32);
460
 
461
+ dpct::queue_ptr main_stream = ctx.stream();
462
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
463
+
464
+ const int64_t ne00 = dst->src[0]->ne[0];
465
+ const int64_t nrows = ggml_nrows(dst->src[0]);
466
+ const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
467
+ float * dst_dd = static_cast<float *>(dst->data);
468
 
469
  float eps;
470
  memcpy(&eps, dst->op_params, sizeof(float));
471
 
472
  l2_norm_f32_sycl(src0_dd, dst_dd, ne00, nrows, eps, main_stream, ctx.device);
473
 
 
 
 
474
  }
ggml/src/ggml-sycl/norm.hpp CHANGED
@@ -15,27 +15,12 @@
15
 
16
  #include "common.hpp"
17
 
18
- void ggml_sycl_op_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, const ggml_tensor* src1,
19
- ggml_tensor* dst, const float* src0_dd,
20
- const float* src1_dd, float* dst_dd,
21
- const queue_ptr& main_stream);
22
-
23
- void ggml_sycl_op_rms_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
24
- const ggml_tensor* src1, ggml_tensor* dst,
25
- const float* src0_dd, const float* src1_dd,
26
- float* dst_dd,
27
- const queue_ptr& main_stream);
28
-
29
- void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
30
- const ggml_tensor* src1, ggml_tensor* dst,
31
- const float* src0_dd, const float* src1_dd,
32
- float* dst_dd,
33
- const queue_ptr& main_stream);
34
-
35
- void ggml_sycl_op_l2_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
36
- const ggml_tensor* src1, ggml_tensor* dst,
37
- const float* src0_dd, const float* src1_dd,
38
- float* dst_dd,
39
- const queue_ptr& main_stream);
40
 
41
  #endif // GGML_SYCL_NORM_HPP
 
15
 
16
  #include "common.hpp"
17
 
18
+ void ggml_sycl_op_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst);
19
+
20
+ void ggml_sycl_op_rms_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst);
21
+
22
+ void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst);
23
+
24
+ void ggml_sycl_op_l2_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  #endif // GGML_SYCL_NORM_HPP
ggml/src/ggml-sycl/rope.cpp CHANGED
@@ -192,18 +192,15 @@ static void rope_neox_sycl(
192
  }
193
  }
194
 
195
- void ggml_sycl_op_rope(
196
- ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst,
197
- const float *src0_dd, const float *src1_dd, float *dst_dd, const queue_ptr &main_stream) {
198
- const ggml_tensor * src2 = dst->src[2];
199
 
200
- GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16);
201
  GGML_ASSERT( dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
202
- GGML_ASSERT(src0->type == dst->type);
203
 
204
- const int64_t ne00 = src0->ne[0];
205
- const int64_t ne01 = src0->ne[1];
206
- const int64_t nr = ggml_nrows(src0);
207
 
208
  //const int n_past = ((int32_t *) dst->op_params)[0];
209
  const int n_dims = ((int32_t *) dst->op_params)[1];
@@ -228,49 +225,47 @@ void ggml_sycl_op_rope(
228
 
229
  const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
230
 
231
- const int32_t * pos = (const int32_t *) src1_dd;
232
 
233
  const float * freq_factors = nullptr;
234
- if (src2 != nullptr) {
235
- freq_factors = (const float *) src2->data;
236
  }
237
 
238
  rope_corr_dims corr_dims;
239
  ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims.v);
240
 
 
 
 
241
  // compute
242
  if (is_neox) {
243
- if (src0->type == GGML_TYPE_F32) {
244
  rope_neox_sycl(
245
- (const float *)src0_dd, (float *)dst_dd, ne00, n_dims, nr, pos, freq_scale, ne01, freq_base, ext_factor,
246
  attn_factor, corr_dims, freq_factors, main_stream
247
  );
248
- } else if (src0->type == GGML_TYPE_F16) {
249
  rope_neox_sycl(
250
- (const sycl::half *)src0_dd, (sycl::half *)dst_dd, ne00, n_dims, nr, pos, freq_scale, ne01, freq_base, ext_factor,
251
  attn_factor, corr_dims, freq_factors, main_stream
252
  );
253
  } else {
254
  GGML_ABORT("fatal error");
255
  }
256
  } else {
257
- if (src0->type == GGML_TYPE_F32) {
258
  rope_norm_sycl(
259
- (const float *)src0_dd, (float *)dst_dd, ne00, n_dims, nr, pos, freq_scale, ne01, freq_base, ext_factor,
260
  attn_factor, corr_dims, freq_factors, main_stream
261
  );
262
- } else if (src0->type == GGML_TYPE_F16) {
263
  rope_norm_sycl(
264
- (const sycl::half *)src0_dd, (sycl::half *)dst_dd, ne00, n_dims, nr, pos, freq_scale, ne01, freq_base, ext_factor,
265
  attn_factor, corr_dims, freq_factors, main_stream
266
  );
267
  } else {
268
  GGML_ABORT("fatal error");
269
  }
270
  }
271
-
272
- GGML_UNUSED(src1);
273
- GGML_UNUSED(dst);
274
- GGML_UNUSED(src1_dd);
275
- GGML_UNUSED(ctx);
276
  }
 
192
  }
193
  }
194
 
195
+ void ggml_sycl_op_rope(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
 
 
196
 
197
+ GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16);
198
  GGML_ASSERT( dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
199
+ GGML_ASSERT(dst->src[0]->type == dst->type);
200
 
201
+ const int64_t ne00 = dst->src[0]->ne[0];
202
+ const int64_t ne01 = dst->src[0]->ne[1];
203
+ const int64_t nr = ggml_nrows(dst->src[0]);
204
 
205
  //const int n_past = ((int32_t *) dst->op_params)[0];
206
  const int n_dims = ((int32_t *) dst->op_params)[1];
 
225
 
226
  const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
227
 
228
+ const int32_t * pos = (const int32_t *) dst->src[1]->data;
229
 
230
  const float * freq_factors = nullptr;
231
+ if (dst->src[2] != nullptr) {
232
+ freq_factors = (const float *) dst->src[2]->data;
233
  }
234
 
235
  rope_corr_dims corr_dims;
236
  ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims.v);
237
 
238
+ dpct::queue_ptr main_stream = ctx.stream();
239
+ SYCL_CHECK(ggml_sycl_set_device(ctx.device));
240
+
241
  // compute
242
  if (is_neox) {
243
+ if (dst->src[0]->type == GGML_TYPE_F32) {
244
  rope_neox_sycl(
245
+ (const float *)dst->src[0]->data, (float *)dst->data, ne00, n_dims, nr, pos, freq_scale, ne01, freq_base, ext_factor,
246
  attn_factor, corr_dims, freq_factors, main_stream
247
  );
248
+ } else if (dst->src[0]->type == GGML_TYPE_F16) {
249
  rope_neox_sycl(
250
+ (const sycl::half *)dst->src[0]->data, (sycl::half *)dst->data, ne00, n_dims, nr, pos, freq_scale, ne01, freq_base, ext_factor,
251
  attn_factor, corr_dims, freq_factors, main_stream
252
  );
253
  } else {
254
  GGML_ABORT("fatal error");
255
  }
256
  } else {
257
+ if (dst->src[0]->type == GGML_TYPE_F32) {
258
  rope_norm_sycl(
259
+ (const float *)dst->src[0]->data, (float *)dst->data, ne00, n_dims, nr, pos, freq_scale, ne01, freq_base, ext_factor,
260
  attn_factor, corr_dims, freq_factors, main_stream
261
  );
262
+ } else if (dst->src[0]->type == GGML_TYPE_F16) {
263
  rope_norm_sycl(
264
+ (const sycl::half *)dst->src[0]->data, (sycl::half *)dst->data, ne00, n_dims, nr, pos, freq_scale, ne01, freq_base, ext_factor,
265
  attn_factor, corr_dims, freq_factors, main_stream
266
  );
267
  } else {
268
  GGML_ABORT("fatal error");
269
  }
270
  }
 
 
 
 
 
271
  }
ggml/src/ggml-sycl/rope.hpp CHANGED
@@ -15,8 +15,6 @@
15
 
16
  #include "common.hpp"
17
 
18
- void ggml_sycl_op_rope(
19
- ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst,
20
- const float *src0_dd, const float *src1_dd, float *dst_dd, const queue_ptr &main_stream);
21
 
22
  #endif // GGML_SYCL_ROPE_HPP
 
15
 
16
  #include "common.hpp"
17
 
18
+ void ggml_sycl_op_rope(ggml_backend_sycl_context & ctx, ggml_tensor *dst);
 
 
19
 
20
  #endif // GGML_SYCL_ROPE_HPP