Fix const in phi3 qwen and minicpm (#3072)

sgonorov · web-flow · commit 65b9356f2417 · 2025-11-26T10:17:43.000Z
Fix const cast in phi3 qwen and minicpm
diff --git a/src/cpp/src/image_generation/models/autoencoder_kl.cpp b/src/cpp/src/image_generation/models/autoencoder_kl.cpp
@@ -32,7 +32,7 @@ class DiagonalGaussianDistribution {
         OPENVINO_ASSERT(shape[0] == 1, "Batch size must be 1");
         shape[1] /= 2;
 
-        m_mean = ov::Tensor(parameters.get_element_type(), shape, const_cast<void*>(parameters.data()));
+        m_mean = ov::Tensor(parameters.get_element_type(), shape, parameters.data());
         m_std = ov::Tensor(m_mean.get_element_type(), shape);
         ov::Tensor logvar(parameters.get_element_type(), shape, m_mean.data<float>() + m_mean.get_size());
 
diff --git a/src/cpp/src/llm/pipeline_stateful.cpp b/src/cpp/src/llm/pipeline_stateful.cpp
@@ -325,7 +325,7 @@ EncodedResults StatefulLLMPipeline::generate(
 
     // Tail of previous output in chat mode is missing in KV cache.
     if (is_chat_conversation && m_chat_input_type == ov::genai::utils::GenerationChatInputsType::ENCODED_INPUTS) {
-        ov::Tensor new_chat_tokens = ov::Tensor{ov::element::i64, {1, m_tokenized_chat_history.size()}, const_cast<int64_t*>(m_tokenized_chat_history.data())};
+        ov::Tensor new_chat_tokens = ov::Tensor{ov::element::i64, {1, m_tokenized_chat_history.size()}, m_tokenized_chat_history.data()};
         ov::genai::align_kv_cache_and_history(new_chat_tokens, m_kv_cache_state);
 
         auto encoded_input = get_chat_encoded_input(new_chat_tokens, m_kv_cache_state);
@@ -414,7 +414,7 @@ EncodedResults StatefulLLMPipeline::generate(
             tokenized_chat_hist.reserve(state.size() + input_ids.get_size());
             std::copy(state.begin(), state.end(), std::back_inserter(tokenized_chat_hist));
             std::copy(input_ids.data<int64_t>(), input_ids.data<int64_t>() + input_ids.get_size(), std::back_inserter(tokenized_chat_hist));
-            sequence_group = std::make_shared<SequenceGroup>(request_id,  ov::Tensor(ov::element::i64, {1, tokenized_chat_hist.size()}, const_cast<int64_t*>(tokenized_chat_hist.data())), config, block_size);
+            sequence_group = std::make_shared<SequenceGroup>(request_id,  ov::Tensor(ov::element::i64, {1, tokenized_chat_hist.size()}, tokenized_chat_hist.data()), config, block_size);
         } else {
             size_t seq_len = input_ids.get_shape().at(1);
             size_t batch_offset = request_id * seq_len;
diff --git a/src/cpp/src/llm/pipeline_static.cpp b/src/cpp/src/llm/pipeline_static.cpp
@@ -336,7 +336,7 @@ EncodedResults StatefulLLMPipeline::generate(
         ++position_ids_data;
         // However, attention_mask changes its shape on each iteration, it should be re-set explicitly
         attention_mask_data.push_back(1);
-        m_request.set_tensor("attention_mask", ov::Tensor(ov::element::i64, ov::Shape{1,attention_mask_data.size()}, const_cast<int64_t*>(attention_mask_data.data())));
+        m_request.set_tensor("attention_mask", ov::Tensor(ov::element::i64, ov::Shape{1,attention_mask_data.size()}, attention_mask_data.data()));
 
         m_request.infer();
 
diff --git a/src/cpp/src/lm_encoding.cpp b/src/cpp/src/lm_encoding.cpp
@@ -259,7 +259,7 @@ ov::genai::utils::GenerationFinishInfo get_lm_encoded_results(
             }
         }
 
-        m_llm.set_tensor("beam_idx", ov::Tensor{ov::element::i32, {total_num_tokens}, const_cast<int32_t*>(next_beams.data())});
+        m_llm.set_tensor("beam_idx", ov::Tensor{ov::element::i32, {total_num_tokens}, next_beams.data()});
 
         const auto infer_start = std::chrono::steady_clock::now();
         m_llm.start_async();
diff --git a/src/cpp/src/sequence_group.hpp b/src/cpp/src/sequence_group.hpp
@@ -353,8 +353,9 @@ class SequenceGroup  : public std::enable_shared_from_this<SequenceGroup> {
     using Ptr = std::shared_ptr<SequenceGroup>;
     using CPtr = std::shared_ptr<const SequenceGroup>;
 
+    // const_cast is safe as ov::Tensor only views the data and doesn't modify it.
     SequenceGroup(uint64_t request_id, const TokenIds& input_ids, const ov::genai::GenerationConfig& sampling_params, std::size_t block_size)
-        : SequenceGroup(request_id, ov::Tensor(ov::element::i64, ov::Shape{input_ids.size()}, (void *)input_ids.data()), sampling_params, block_size, std::nullopt) {
+        : SequenceGroup(request_id, ov::Tensor(ov::element::i64, ov::Shape{input_ids.size()}, const_cast<int64_t*>(input_ids.data())), sampling_params, block_size, std::nullopt) {
     }
 
     SequenceGroup(uint64_t request_id,
diff --git a/src/cpp/src/speculative_decoding/speculative_decoding_stateful.cpp b/src/cpp/src/speculative_decoding/speculative_decoding_stateful.cpp
@@ -191,7 +191,7 @@ int64_t LLMInferWrapper::infer_next(int64_t token, bool append_perf_stat) {
     ++m_new_position_id;
     // However, attention_mask changes its shape on each iteration, it should be re-set explicitly
     m_new_atten_mask_data.push_back(1);
-    m_request.set_tensor("attention_mask", ov::Tensor(ov::element::i64, ov::Shape{1,m_new_atten_mask_data.size()}, const_cast<int64_t*>(m_new_atten_mask_data.data())));
+    m_request.set_tensor("attention_mask", ov::Tensor(ov::element::i64, ov::Shape{1,m_new_atten_mask_data.size()}, m_new_atten_mask_data.data()));
 
     const auto infer_start = std::chrono::steady_clock::now();
     m_request.infer();
diff --git a/src/cpp/src/speech_generation/speecht5_tts_model.cpp b/src/cpp/src/speech_generation/speecht5_tts_model.cpp
@@ -155,8 +155,8 @@ Text2SpeechDecodedResults SpeechT5TTSImpl::generate(const std::vector<std::strin
         // prepare inputs for decoder
         std::vector<float> zeros(bsz * 1 * m_num_mel_bins, 0.0f);
         std::vector<float> empty_spectrogram;
-        ov::Tensor inputs_embeds(ov::element::f32, ov::Shape{bsz, 1, m_num_mel_bins}, const_cast<float*>(zeros.data()));
-        ov::Tensor spectrogram(ov::element::f32, ov::Shape{0, bsz, 2, m_num_mel_bins}, const_cast<float*>(empty_spectrogram.data()));
+        ov::Tensor inputs_embeds(ov::element::f32, ov::Shape{bsz, 1, m_num_mel_bins}, zeros.data());
+        ov::Tensor spectrogram(ov::element::f32, ov::Shape{0, bsz, 2, m_num_mel_bins}, empty_spectrogram.data());
 
         int64_t iter = 0;
         // decoder loop
diff --git a/src/cpp/src/visual_language/minicpm/classes.cpp b/src/cpp/src/visual_language/minicpm/classes.cpp
@@ -455,7 +455,12 @@ ResampledImage VisionEncoderMiniCPM::resample_encoded_image(const EncodedImage&
             for (size_t ja = 0; ja < slices_shape.at(1); ++ja) {
                 size_t d2 = slices_shape.at(2);
                 size_t d3 = slices_shape.at(3);
-                ov::Tensor encoded_view{ov::element::f32, {1, d2, d3}, slices.data<float>() + (i * slices_shape.at(1) + ja) * d2 * d3};
+                // const_cast is safe as ov::Tensor only views the data and doesn't modify it.
+                ov::Tensor encoded_view{
+                    ov::element::f32, 
+                    {1, d2, d3}, 
+                    const_cast<float*>(slices.data<float>()) + (i * slices_shape.at(1) + ja) * d2 * d3
+                };
                 vision_embeds[ja] = resample(encoded_view, target_size, pad_to_max);
             }
             vision_embed_tensors[i] = vision_embeds;
diff --git a/src/cpp/src/visual_language/phi3_vision/classes.cpp b/src/cpp/src/visual_language/phi3_vision/classes.cpp
@@ -661,11 +661,12 @@ std::vector<std::variant<ov::Tensor, size_t>> drop_image_placeholders(const ov::
             text_start = offset;
             chunks.push_back(size_t(-(last_token + 1)));
         } else if (last_token >= 0 && next_token < 0) {
+            // const_cast is safe as ov::Tensor only views the data and doesn't modify it.
             chunks.emplace_back(
                 std::in_place_type<ov::Tensor>,
                 ov::element::i64,
                 ov::Shape{1, offset - text_start},
-                tokens.data<int64_t>() + text_start
+                const_cast<int64_t*>(tokens.data<int64_t>()) + text_start
             );
         } else if (last_token < 0 && next_token < 0 && last_token != next_token) {
             chunks.push_back(size_t(-(last_token + 1)));
@@ -675,11 +676,12 @@ std::vector<std::variant<ov::Tensor, size_t>> drop_image_placeholders(const ov::
     // Add the last chunk
     size_t full_length = tokens.get_shape().at(1);
     if (last_token >= 0) {
+        // const_cast is safe as ov::Tensor only views the data and doesn't modify it.
         chunks.emplace_back(
             std::in_place_type<ov::Tensor>,
             ov::element::i64,
             ov::Shape{1, full_length - text_start},
-            tokens.data<int64_t>() + text_start
+            const_cast<int64_t*>(tokens.data<int64_t>()) + text_start
         );
     } else {
         chunks.push_back(size_t(-(last_token + 1)));
diff --git a/src/cpp/src/visual_language/phi4mm/classes.cpp b/src/cpp/src/visual_language/phi4mm/classes.cpp
@@ -728,6 +728,7 @@ EncodedImage VisionEncoderPhi4MM::encode(const ov::Tensor& image, const ov::AnyM
     {
         ov::Tensor height{ov::element::i32, {}};
         ov::Tensor width{ov::element::i32, {}};
+        // const_cast is safe as ov::Tensor only views the data and doesn't modify it.
         ov::Tensor sub_GN{ov::element::f32, {1, 1, 1, m_vlm_config.sub_GN.size()}, const_cast<float*>(m_vlm_config.sub_GN.data())};
         ov::Tensor glb_GN{ov::element::f32, {1, 1, m_vlm_config.glb_GN.size()}, const_cast<float*>(m_vlm_config.glb_GN.data())};
         height.data<int32_t>()[0] = image_height;
diff --git a/src/cpp/src/visual_language/qwen2vl/classes.cpp b/src/cpp/src/visual_language/qwen2vl/classes.cpp
@@ -807,11 +807,18 @@ void VisionEncoderQwen2VL::encode_with_imagepreprocess_ov(const std::vector<ov::
     const float VIDEO_BRANCH_CONDITION = 0.f;
     const float IMAGE_BRANCH_CONDITION = 1.f;
     std::vector<float> cond_img_vid_data{images.size() == 2u ? VIDEO_BRANCH_CONDITION : IMAGE_BRANCH_CONDITION};
-    ov::Tensor cond_img_vid(ov::element::f32, ov::Shape{1}, const_cast<float*>(cond_img_vid_data.data()));
-    ov::Tensor input_image_1(ov::element::u8, image_shape, images[0].data<uint8_t>());
-    ov::Tensor input_image_2(ov::element::u8,
-                             image_shape,
-                             images.size() == 2 ? images[1].data<uint8_t>() : images[0].data<uint8_t>());
+    ov::Tensor cond_img_vid(ov::element::f32, ov::Shape{1}, cond_img_vid_data.data());
+    // const_cast is safe as ov::Tensor only views the data and doesn't modify it.
+    ov::Tensor input_image_1(
+        ov::element::u8, 
+        image_shape, 
+        const_cast<uint8_t*>(images[0].data<uint8_t>())
+    );
+    ov::Tensor input_image_2(
+        ov::element::u8,
+        image_shape,
+        const_cast<uint8_t*>(images.size() == 2 ? images[1].data<uint8_t>() : images[0].data<uint8_t>())
+    );
 
     uint64_t a_target_shape[2] = {target_image_size.height, target_image_size.width};
     ov::Tensor target_shape(ov::element::i64, ov::Shape{2}, a_target_shape);
diff --git a/src/cpp/src/whisper/whisper.cpp b/src/cpp/src/whisper/whisper.cpp
@@ -77,6 +77,7 @@ std::pair<ov::genai::EncodedResults, bool> decode(std::shared_ptr<ov::genai::Whi
     ov::Tensor beam_idx = decoder->create_host_tensor(ov::element::i32, {batch_size});
     std::fill_n(beam_idx.data<int32_t>(), batch_size, 0);
 
+    // const_cast is safe as ov::Tensor only views the data and doesn't modify it.
     const ov::Tensor input_ids_tensor{ov::element::i64, {1, input_ids.size()}, const_cast<int64_t*>(input_ids.data())};
 
     const auto infer_start = std::chrono::steady_clock::now();
@@ -202,7 +203,7 @@ ov::Tensor encode(ov::InferRequest& request,
                     ". Actual size: ",
                     mel_data.size(),
                     ".");
-    ov::Tensor input_tensor(ov::element::f32, {1, feature_size, nb_max_frames}, const_cast<float*>(mel_data.data()));
+    ov::Tensor input_tensor(ov::element::f32, {1, feature_size, nb_max_frames}, mel_data.data());
 
     request.set_tensor("input_features", input_tensor);
 
diff --git a/tests/cpp/block_manager.cpp b/tests/cpp/block_manager.cpp
@@ -15,7 +15,7 @@ TEST(TestBlockManager, general_test) {
     ov::genai::SequenceGroup::Ptr sequence_group = std::make_shared<ov::genai::SequenceGroup>(
         0,
         ov::Tensor(ov::element::i64, {
-        prompt_ids.size()}, const_cast<int64_t*>(prompt_ids.data())),
+        prompt_ids.size()}, prompt_ids.data()),
         ov::genai::beam_search(),
         4);
     auto sequence = sequence_group->get_not_finished_sequences()[0];
@@ -48,7 +48,7 @@ TEST(TestBlockManager, required_blocks_count) {
     ov::genai::SequenceGroup::Ptr sequence_group = std::make_shared<ov::genai::SequenceGroup>(
         0,
         ov::Tensor(ov::element::i64, {
-        tokens.size()}, const_cast<int64_t*>(tokens.data())),
+        tokens.size()}, tokens.data()),
         ov::genai::beam_search(),
         4);
     sequence_group->schedule_tokens(5);
@@ -98,7 +98,7 @@ TEST(TestBlockManager, CanFreeBlocksFromSequence) {
     ov::genai::SequenceGroup::Ptr sequence_group = std::make_shared<ov::genai::SequenceGroup>(
             0,
             ov::Tensor(ov::element::i64, {
-                    tokens.size()}, const_cast<int64_t*>(tokens.data())),
+                    tokens.size()}, tokens.data()),
             ov::genai::beam_search(),
             BLOCK_SIZE);
     sequence_group->schedule_tokens(5);
diff --git a/tests/cpp/sampler.cpp b/tests/cpp/sampler.cpp
@@ -36,7 +36,7 @@ TEST(SamplerValidationMode, gen_phase_to_cut_whole_seq) {
     auto sampling_config = ov::genai::greedy();
     // create sequence group with prompt [0, 1, 2, 3, 4]
     std::vector<int64_t> input_vector{0, 1, 2, 3, 4};
-    ov::Tensor input_tensor(ov::element::i64, ov::Shape{1, 5}, const_cast<int64_t*>(input_vector.data()));
+    ov::Tensor input_tensor(ov::element::i64, ov::Shape{1, 5}, input_vector.data());
     std::vector<SequenceGroup::Ptr> sequence_groups{
         SequenceGroup::Ptr(new SequenceGroup(0, input_tensor, sampling_config, 32)),
     };
@@ -66,7 +66,7 @@ TEST(SamplerValidationMode, gen_phase_to_cut_whole_seq) {
     };
 
     // shape 4 tokens + 1 batch + 5 vocab
-    ov::Tensor gen_input_ids(ov::element::f32, ov::Shape{4, 1, 5}, const_cast<float*>(logits.data()));
+    ov::Tensor gen_input_ids(ov::element::f32, ov::Shape{4, 1, 5}, logits.data());
 
     Sampler sampler;
     sampler.sample(sequence_groups, gen_input_ids, true);
@@ -80,7 +80,7 @@ TEST(SamplerValidationMode, gen_phase_to_cut_part_seq) {
     auto sampling_config = ov::genai::greedy();
     // create sequence group with prompt [0, 1, 2, 3, 4]
     std::vector<int64_t> input_vector{0, 1, 2, 3, 4};
-    ov::Tensor input_tensor(ov::element::i64, ov::Shape{1, 5}, const_cast<int64_t*>(input_vector.data()));
+    ov::Tensor input_tensor(ov::element::i64, ov::Shape{1, 5}, input_vector.data());
     std::vector<SequenceGroup::Ptr> sequence_groups{
         SequenceGroup::Ptr(new SequenceGroup(0, input_tensor, sampling_config, 32)),
     };
@@ -111,7 +111,7 @@ TEST(SamplerValidationMode, gen_phase_to_cut_part_seq) {
     };
 
     // shape 4 tokens + 1 batch + 5 vocab
-    ov::Tensor gen_input_ids(ov::element::f32, ov::Shape{4, 1, 5}, const_cast<float*>(logits.data()));
+    ov::Tensor gen_input_ids(ov::element::f32, ov::Shape{4, 1, 5}, logits.data());
 
     Sampler sampler;
     sampler.sample(sequence_groups, gen_input_ids, true);
@@ -125,7 +125,7 @@ TEST(SamplerValidationMode, gen_phase) {
     auto sampling_config = ov::genai::greedy();
     // create sequence group with prompt [0, 1, 2, 3, 4]
     std::vector<int64_t> input_vector{0, 1, 2, 3, 4};
-    ov::Tensor input_tensor(ov::element::i64, ov::Shape{1, 5}, const_cast<int64_t*>(input_vector.data()));
+    ov::Tensor input_tensor(ov::element::i64, ov::Shape{1, 5}, input_vector.data());
     std::vector<SequenceGroup::Ptr> sequence_groups{
         SequenceGroup::Ptr(new SequenceGroup(0, input_tensor, sampling_config, 32)),
     };
@@ -155,7 +155,7 @@ TEST(SamplerValidationMode, gen_phase) {
     };
 
     // shape 4 tokens + 1 batch + 5 vocab
-    ov::Tensor gen_input_ids(ov::element::f32, ov::Shape{4, 1, 5}, const_cast<float*>(logits.data()));
+    ov::Tensor gen_input_ids(ov::element::f32, ov::Shape{4, 1, 5}, logits.data());
 
     Sampler sampler;
     sampler.sample(sequence_groups, gen_input_ids, true);
@@ -169,7 +169,7 @@ TEST(SamplerValidationMode, prompt_phase_to_cut_part_seq) {
     auto sampling_config = ov::genai::greedy();
     // create sequence group with prompt [0, 1, 2, 3, 4]
     std::vector<int64_t> input_vector{0, 1, 2, 3, 4};
-    ov::Tensor input_tensor(ov::element::i64, ov::Shape{1, 5}, const_cast<int64_t*>(input_vector.data()));
+    ov::Tensor input_tensor(ov::element::i64, ov::Shape{1, 5}, input_vector.data());
     std::vector<SequenceGroup::Ptr> sequence_groups{
         SequenceGroup::Ptr(new SequenceGroup(0, input_tensor, sampling_config, 32)),
     };
@@ -201,7 +201,7 @@ TEST(SamplerValidationMode, prompt_phase_to_cut_part_seq) {
     };
 
     // shape 4 tokens + 1 batch + 5 vocab
-    ov::Tensor gen_input_ids(ov::element::f32, ov::Shape{8, 1, 5}, const_cast<float*>(logits.data()));
+    ov::Tensor gen_input_ids(ov::element::f32, ov::Shape{8, 1, 5}, logits.data());
 
     Sampler sampler;
     sampler.sample(sequence_groups, gen_input_ids, true);
@@ -215,7 +215,7 @@ TEST(SamplerValidationMode, prompt_phase_to_cut_whole_seq) {
     auto sampling_config = ov::genai::greedy();
     // create sequence group with prompt [0, 1, 2, 3, 4]
     std::vector<int64_t> input_vector{0, 1, 2, 3, 4};
-    ov::Tensor input_tensor(ov::element::i64, ov::Shape{1, 5}, const_cast<int64_t*>(input_vector.data()));
+    ov::Tensor input_tensor(ov::element::i64, ov::Shape{1, 5}, input_vector.data());
     std::vector<SequenceGroup::Ptr> sequence_groups{
         SequenceGroup::Ptr(new SequenceGroup(0, input_tensor, sampling_config, 32)),
     };
@@ -246,7 +246,7 @@ TEST(SamplerValidationMode, prompt_phase_to_cut_whole_seq) {
     };
 
     // shape 4 tokens + 1 batch + 5 vocab
-    ov::Tensor gen_input_ids(ov::element::f32, ov::Shape{8, 1, 5}, const_cast<float*>(logits.data()));
+    ov::Tensor gen_input_ids(ov::element::f32, ov::Shape{8, 1, 5}, logits.data());
 
     Sampler sampler;
     sampler.sample(sequence_groups, gen_input_ids, true);
@@ -260,7 +260,7 @@ TEST(SamplerValidationMode, prompt_phase) {
     auto sampling_config = ov::genai::greedy();
     // create sequence group with prompt [0, 1, 2, 3, 4]
     std::vector<int64_t> input_vector{0, 1, 2, 3, 4};
-    ov::Tensor input_tensor(ov::element::i64, ov::Shape{1, 5}, const_cast<int64_t*>(input_vector.data()));
+    ov::Tensor input_tensor(ov::element::i64, ov::Shape{1, 5}, input_vector.data());
     std::vector<SequenceGroup::Ptr> sequence_groups{
         SequenceGroup::Ptr(new SequenceGroup(0, input_tensor, sampling_config, 32)),
     };
@@ -291,7 +291,7 @@ TEST(SamplerValidationMode, prompt_phase) {
     };
 
     // shape 4 tokens + 1 batch + 5 vocab
-    ov::Tensor gen_input_ids(ov::element::f32, ov::Shape{8, 1, 5}, const_cast<float*>(logits.data()));
+    ov::Tensor gen_input_ids(ov::element::f32, ov::Shape{8, 1, 5}, logits.data());
 
     Sampler sampler;
     sampler.sample(sequence_groups, gen_input_ids, true);
diff --git a/tests/cpp/scheduler.cpp b/tests/cpp/scheduler.cpp
diff --git a/tests/cpp/speculative_decoding.cpp b/tests/cpp/speculative_decoding.cpp

Original file line number	Diff line number	Diff line change
`@@ -259,7 +259,7 @@ ov::genai::utils::GenerationFinishInfo get_lm_encoded_results(`
`259`	`259`	`}`
`260`	`260`	`}`
`261`	`261`
`262`		`- m_llm.set_tensor("beam_idx", ov::Tensor{ov::element::i32, {total_num_tokens}, const_cast<int32_t*>(next_beams.data())});`
	`262`	`+ m_llm.set_tensor("beam_idx", ov::Tensor{ov::element::i32, {total_num_tokens}, next_beams.data()});`
`263`	`263`
`264`	`264`	`const auto infer_start = std::chrono::steady_clock::now();`
`265`	`265`	`m_llm.start_async();`
Original file line number	Diff line number	Diff line change
`@@ -353,8 +353,9 @@ class SequenceGroup : public std::enable_shared_from_this<SequenceGroup> {`
`353`	`353`	`using Ptr = std::shared_ptr<SequenceGroup>;`
`354`	`354`	`using CPtr = std::shared_ptr<const SequenceGroup>;`
`355`	`355`
	`356`	`+ // const_cast is safe as ov::Tensor only views the data and doesn't modify it.`
`356`	`357`	`SequenceGroup(uint64_t request_id, const TokenIds& input_ids, const ov::genai::GenerationConfig& sampling_params, std::size_t block_size)`
`357`		`- : SequenceGroup(request_id, ov::Tensor(ov::element::i64, ov::Shape{input_ids.size()}, (void *)input_ids.data()), sampling_params, block_size, std::nullopt) {`
	`358`	`+ : SequenceGroup(request_id, ov::Tensor(ov::element::i64, ov::Shape{input_ids.size()}, const_cast<int64_t*>(input_ids.data())), sampling_params, block_size, std::nullopt) {`
`358`	`359`	`}`
`359`	`360`
`360`	`361`	`SequenceGroup(uint64_t request_id,`
Original file line number	Diff line number	Diff line change
`@@ -728,6 +728,7 @@ EncodedImage VisionEncoderPhi4MM::encode(const ov::Tensor& image, const ov::AnyM`
`728`	`728`	`{`
`729`	`729`	`ov::Tensor height{ov::element::i32, {}};`
`730`	`730`	`ov::Tensor width{ov::element::i32, {}};`
	`731`	`+ // const_cast is safe as ov::Tensor only views the data and doesn't modify it.`
`731`	`732`	`ov::Tensor sub_GN{ov::element::f32, {1, 1, 1, m_vlm_config.sub_GN.size()}, const_cast<float*>(m_vlm_config.sub_GN.data())};`
`732`	`733`	`ov::Tensor glb_GN{ov::element::f32, {1, 1, m_vlm_config.glb_GN.size()}, const_cast<float*>(m_vlm_config.glb_GN.data())};`
`733`	`734`	`height.data<int32_t>()[0] = image_height;`