Skip to content

Commit e14e842

Browse files
CUDA: fix MMQ stream-k fixup ne1 indices (#17089)
1 parent 647b960 commit e14e842

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml/src/ggml-cuda/mmq.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3494,7 +3494,7 @@ static __global__ void mul_mat_q_stream_k_fixup(
34943494
const int col_diff = col_high - col_low;
34953495

34963496
for (int j = threadIdx.y*warp_size + threadIdx.x; j < mmq_x; j += nwarps*warp_size) {
3497-
ids_dst_shared[j] = ids_dst[col_low + j];
3497+
ids_dst_shared[j] = ids_dst[col_low + jt*mmq_x + j];
34983498
}
34993499
__syncthreads();
35003500

0 commit comments

Comments
 (0)