ggerganov commited on
Commit
95f6502
·
unverified ·
1 Parent(s): 73c6598

cuda : fix dequantize kernel names (llama/4938)

Browse files
Files changed (1) hide show
  1. ggml-cuda.cu +6 -6
ggml-cuda.cu CHANGED
@@ -6309,14 +6309,14 @@ static void dequantize_row_q3_K_cuda(const void * vx, dst_t * y, const int k, cu
6309
  }
6310
 
6311
  template<typename dst_t>
6312
- static void dequantize_q4_0_cuda(const void * vx, dst_t * y, const int k, cudaStream_t stream) {
6313
  const int nb32 = k / 32;
6314
  const int nb = (k + 255) / 256;
6315
  dequantize_block_q4_0<<<nb, 32, 0, stream>>>(vx, y, nb32);
6316
  }
6317
 
6318
  template<typename dst_t>
6319
- static void dequantize_q4_1_cuda(const void * vx, dst_t * y, const int k, cudaStream_t stream) {
6320
  const int nb32 = k / 32;
6321
  const int nb = (k + 255) / 256;
6322
  dequantize_block_q4_1<<<nb, 32, 0, stream>>>(vx, y, nb32);
@@ -6370,9 +6370,9 @@ static to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type) {
6370
  int id;
6371
  switch (type) {
6372
  case GGML_TYPE_Q4_0:
6373
- return dequantize_q4_0_cuda;
6374
  case GGML_TYPE_Q4_1:
6375
- return dequantize_q4_1_cuda;
6376
  case GGML_TYPE_Q5_0:
6377
  return dequantize_block_cuda<QK5_0, QR5_0, dequantize_q5_0>;
6378
  case GGML_TYPE_Q5_1:
@@ -6407,9 +6407,9 @@ static to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type) {
6407
  static to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type) {
6408
  switch (type) {
6409
  case GGML_TYPE_Q4_0:
6410
- return dequantize_q4_0_cuda;
6411
  case GGML_TYPE_Q4_1:
6412
- return dequantize_q4_1_cuda;
6413
  case GGML_TYPE_Q5_0:
6414
  return dequantize_block_cuda<QK5_0, QR5_0, dequantize_q5_0>;
6415
  case GGML_TYPE_Q5_1:
 
6309
  }
6310
 
6311
  template<typename dst_t>
6312
+ static void dequantize_row_q4_0_cuda(const void * vx, dst_t * y, const int k, cudaStream_t stream) {
6313
  const int nb32 = k / 32;
6314
  const int nb = (k + 255) / 256;
6315
  dequantize_block_q4_0<<<nb, 32, 0, stream>>>(vx, y, nb32);
6316
  }
6317
 
6318
  template<typename dst_t>
6319
+ static void dequantize_row_q4_1_cuda(const void * vx, dst_t * y, const int k, cudaStream_t stream) {
6320
  const int nb32 = k / 32;
6321
  const int nb = (k + 255) / 256;
6322
  dequantize_block_q4_1<<<nb, 32, 0, stream>>>(vx, y, nb32);
 
6370
  int id;
6371
  switch (type) {
6372
  case GGML_TYPE_Q4_0:
6373
+ return dequantize_row_q4_0_cuda;
6374
  case GGML_TYPE_Q4_1:
6375
+ return dequantize_row_q4_1_cuda;
6376
  case GGML_TYPE_Q5_0:
6377
  return dequantize_block_cuda<QK5_0, QR5_0, dequantize_q5_0>;
6378
  case GGML_TYPE_Q5_1:
 
6407
  static to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type) {
6408
  switch (type) {
6409
  case GGML_TYPE_Q4_0:
6410
+ return dequantize_row_q4_0_cuda;
6411
  case GGML_TYPE_Q4_1:
6412
+ return dequantize_row_q4_1_cuda;
6413
  case GGML_TYPE_Q5_0:
6414
  return dequantize_block_cuda<QK5_0, QR5_0, dequantize_q5_0>;
6415
  case GGML_TYPE_Q5_1: