We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 647b960 commit e14e842Copy full SHA for e14e842
ggml/src/ggml-cuda/mmq.cuh
@@ -3494,7 +3494,7 @@ static __global__ void mul_mat_q_stream_k_fixup(
3494
const int col_diff = col_high - col_low;
3495
3496
for (int j = threadIdx.y*warp_size + threadIdx.x; j < mmq_x; j += nwarps*warp_size) {
3497
- ids_dst_shared[j] = ids_dst[col_low + j];
+ ids_dst_shared[j] = ids_dst[col_low + jt*mmq_x + j];
3498
}
3499
__syncthreads();
3500
0 commit comments