@@ -5428,7 +5428,8 @@ static void ggml_vk_print_tensor(ggml_backend_vk_context * ctx, const ggml_tenso
54285428
54295429 ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra ;
54305430
5431- ggml_vk_buffer_read (ctx, extra->buffer_gpu , extra->offset , tensor_data, tensor_size);
5431+ vk_buffer buffer_gpu = extra->buffer_gpu .lock ();
5432+ ggml_vk_buffer_read (ctx, buffer_gpu, extra->offset , tensor_data, tensor_size);
54325433 }
54335434
54345435 std::cerr << " TENSOR CHECK " << name << " (" << tensor->name << " ): " << ggml_op_name (tensor->op ) << std::endl;
@@ -5540,7 +5541,8 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
55405541 for (int i3 = 0 ; i3 < src0->ne [3 ]; i3++) {
55415542 for (int i2 = 0 ; i2 < src0->ne [2 ]; i2++) {
55425543 const int idx = i3*src0->ne [2 ] + i2;
5543- ggml_vk_buffer_read (ctx, extra->buffer_gpu , offset + idx * src0->nb [2 ], ((char *)src0_clone->data + idx * src0_clone->nb [2 ]), src0->ne [1 ] * src0->nb [1 ]);
5544+ vk_buffer buffer_gpu = extra->buffer_gpu .lock ();
5545+ ggml_vk_buffer_read (ctx, buffer_gpu, offset + idx * src0->nb [2 ], ((char *)src0_clone->data + idx * src0_clone->nb [2 ]), src0->ne [1 ] * src0->nb [1 ]);
55445546 }
55455547 }
55465548
@@ -5550,10 +5552,11 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
55505552 src0_clone->nb [i] = src0_clone->nb [i - 1 ]*src0_clone->ne [i - 1 ];
55515553 }
55525554 } else {
5553- if (offset + src0_size >= extra->buffer_gpu ->size ) {
5554- src0_size = extra->buffer_gpu ->size - offset;
5555+ vk_buffer buffer_gpu = extra->buffer_gpu .lock ();
5556+ if (offset + src0_size >= buffer_gpu->size ) {
5557+ src0_size = buffer_gpu->size - offset;
55555558 }
5556- ggml_vk_buffer_read (ctx, extra-> buffer_gpu , offset, src0_clone->data , src0_size);
5559+ ggml_vk_buffer_read (ctx, buffer_gpu, offset, src0_clone->data , src0_size);
55575560 memcpy (src0_clone->nb , src0->nb , sizeof (size_t ) * GGML_MAX_DIMS);
55585561 }
55595562 } else {
@@ -5583,7 +5586,8 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
55835586 for (int i3 = 0 ; i3 < src1->ne [3 ]; i3++) {
55845587 for (int i2 = 0 ; i2 < src1->ne [2 ]; i2++) {
55855588 const int idx = i3*src1->ne [2 ] + i2;
5586- ggml_vk_buffer_read (ctx, extra->buffer_gpu , offset + idx * src1->nb [2 ], ((char *)src1_clone->data + idx * src1_clone->nb [2 ]), src1->ne [1 ] * src1->nb [1 ]);
5589+ vk_buffer buffer_gpu = extra->buffer_gpu .lock ();
5590+ ggml_vk_buffer_read (ctx, buffer_gpu, offset + idx * src1->nb [2 ], ((char *)src1_clone->data + idx * src1_clone->nb [2 ]), src1->ne [1 ] * src1->nb [1 ]);
55875591 }
55885592 }
55895593
@@ -5593,10 +5597,11 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
55935597 src1_clone->nb [i] = src1_clone->nb [i - 1 ]*src1_clone->ne [i - 1 ];
55945598 }
55955599 } else {
5596- if (offset + src1_size >= extra->buffer_gpu ->size ) {
5597- src1_size = extra->buffer_gpu ->size - offset;
5600+ vk_buffer buffer_gpu = extra->buffer_gpu .lock ();
5601+ if (offset + src1_size >= buffer_gpu->size ) {
5602+ src1_size = buffer_gpu->size - offset;
55985603 }
5599- ggml_vk_buffer_read (ctx, extra-> buffer_gpu , offset, src1_clone->data , src1_size);
5604+ ggml_vk_buffer_read (ctx, buffer_gpu, offset, src1_clone->data , src1_size);
56005605 memcpy (src1_clone->nb , src1->nb , sizeof (size_t ) * GGML_MAX_DIMS);
56015606 }
56025607 } else {
@@ -5643,11 +5648,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
56435648 } else if (tensor->op == GGML_OP_RMS_NORM) {
56445649 tensor_clone = ggml_rms_norm (ggml_ctx, src0_clone, *(float *)tensor->op_params );
56455650 } else if (tensor->op == GGML_OP_SOFT_MAX) {
5646- if (src1 != nullptr ) {
5647- tensor_clone = ggml_soft_max_ext (ggml_ctx, src0_clone, src1_clone, *(float *)tensor->op_params );
5648- } else {
56495651 tensor_clone = ggml_soft_max (ggml_ctx, src0_clone);
5650- }
56515652 } else if (tensor->op == GGML_OP_DIAG_MASK_INF) {
56525653 tensor_clone = ggml_diag_mask_inf (ggml_ctx, src0_clone, *(float *)tensor->op_params );
56535654 } else if (tensor->op == GGML_OP_ROPE) {
@@ -5753,11 +5754,12 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_compute_
57535754
57545755 ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra ;
57555756
5756- if (extra->offset + tensor_size >= extra->buffer_gpu ->size ) {
5757- tensor_size = extra->buffer_gpu ->size - (extra->offset );
5757+ vk_buffer buffer_gpu = extra->buffer_gpu .lock ();
5758+ if (extra->offset + tensor_size >= buffer_gpu->size ) {
5759+ tensor_size = buffer_gpu->size - (extra->offset );
57585760 }
57595761
5760- ggml_vk_buffer_read (ctx, extra-> buffer_gpu , extra->offset , tensor_data, tensor_size);
5762+ ggml_vk_buffer_read (ctx, buffer_gpu, extra->offset , tensor_data, tensor_size);
57615763 }
57625764
57635765 float first_error_result = -1 .0f ;
0 commit comments