Spaces:
Running
Running
R0CKSTAR
commited on
Commit
·
0fd6120
1
Parent(s):
9fb68a1
cuda : remove nrows_x in mul_mat_q_process_tile (llama/13325)
Browse files
ggml/src/ggml-cuda/mmq.cuh
CHANGED
|
@@ -2522,7 +2522,7 @@ template <ggml_type type, int mmq_x, int nwarps, bool need_check, bool fixup>
|
|
| 2522 |
static __device__ __forceinline__ void mul_mat_q_process_tile(
|
| 2523 |
const char * __restrict__ x, const int offset_x, const int * __restrict__ y,
|
| 2524 |
const int * __restrict__ ids_dst, float * __restrict__ dst, float * __restrict__ tmp_fixup,
|
| 2525 |
-
const int
|
| 2526 |
const int tile_x_max_i, const int tile_y_max_j, const int kb0_start, const int kb0_stop) {
|
| 2527 |
|
| 2528 |
constexpr int qk = ggml_cuda_type_traits<type>::qk;
|
|
@@ -2689,7 +2689,7 @@ static __global__ void mul_mat_q(
|
|
| 2689 |
|
| 2690 |
constexpr bool fixup = false;
|
| 2691 |
mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup>
|
| 2692 |
-
(x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup,
|
| 2693 |
tile_x_max_i, tile_y_max_j, 0, ncols_x/qk);
|
| 2694 |
return;
|
| 2695 |
}
|
|
@@ -2767,7 +2767,7 @@ static __global__ void mul_mat_q(
|
|
| 2767 |
|
| 2768 |
constexpr bool fixup = false; // All but (potentially) the last iterations write their data to dst rather than the fixup buffer.
|
| 2769 |
mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup>
|
| 2770 |
-
(x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup,
|
| 2771 |
tile_x_max_i, tile_y_max_j, kb0_start, kb0_stop);
|
| 2772 |
|
| 2773 |
kbc += blocks_per_ne00;
|
|
@@ -2834,7 +2834,7 @@ static __global__ void mul_mat_q(
|
|
| 2834 |
|
| 2835 |
constexpr bool fixup = true; // Last index writes its data to fixup buffer to avoid data races with other blocks.
|
| 2836 |
mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup>
|
| 2837 |
-
(x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup,
|
| 2838 |
tile_x_max_i, tile_y_max_j, kb0_start, kb0_stop);
|
| 2839 |
}
|
| 2840 |
|
|
|
|
| 2522 |
static __device__ __forceinline__ void mul_mat_q_process_tile(
|
| 2523 |
const char * __restrict__ x, const int offset_x, const int * __restrict__ y,
|
| 2524 |
const int * __restrict__ ids_dst, float * __restrict__ dst, float * __restrict__ tmp_fixup,
|
| 2525 |
+
const int stride_row_x, const int ncols_y, const int stride_col_dst,
|
| 2526 |
const int tile_x_max_i, const int tile_y_max_j, const int kb0_start, const int kb0_stop) {
|
| 2527 |
|
| 2528 |
constexpr int qk = ggml_cuda_type_traits<type>::qk;
|
|
|
|
| 2689 |
|
| 2690 |
constexpr bool fixup = false;
|
| 2691 |
mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup>
|
| 2692 |
+
(x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, stride_row_x, ncols_y, stride_col_dst,
|
| 2693 |
tile_x_max_i, tile_y_max_j, 0, ncols_x/qk);
|
| 2694 |
return;
|
| 2695 |
}
|
|
|
|
| 2767 |
|
| 2768 |
constexpr bool fixup = false; // All but (potentially) the last iterations write their data to dst rather than the fixup buffer.
|
| 2769 |
mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup>
|
| 2770 |
+
(x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, stride_row_x, ncols_y, stride_col_dst,
|
| 2771 |
tile_x_max_i, tile_y_max_j, kb0_start, kb0_stop);
|
| 2772 |
|
| 2773 |
kbc += blocks_per_ne00;
|
|
|
|
| 2834 |
|
| 2835 |
constexpr bool fixup = true; // Last index writes its data to fixup buffer to avoid data races with other blocks.
|
| 2836 |
mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup>
|
| 2837 |
+
(x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, stride_row_x, ncols_y, stride_col_dst,
|
| 2838 |
tile_x_max_i, tile_y_max_j, kb0_start, kb0_stop);
|
| 2839 |
}
|
| 2840 |
|