Skip to content

Commit 65b9356

Browse files
authored
Fix const in phi3 qwen and minicpm (#3072)
Fix const cast in phi3 qwen and minicpm
1 parent eb64c87 commit 65b9356

File tree

16 files changed

+82
-65
lines changed

16 files changed

+82
-65
lines changed

src/cpp/src/image_generation/models/autoencoder_kl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ class DiagonalGaussianDistribution {
3232
OPENVINO_ASSERT(shape[0] == 1, "Batch size must be 1");
3333
shape[1] /= 2;
3434

35-
m_mean = ov::Tensor(parameters.get_element_type(), shape, const_cast<void*>(parameters.data()));
35+
m_mean = ov::Tensor(parameters.get_element_type(), shape, parameters.data());
3636
m_std = ov::Tensor(m_mean.get_element_type(), shape);
3737
ov::Tensor logvar(parameters.get_element_type(), shape, m_mean.data<float>() + m_mean.get_size());
3838

src/cpp/src/llm/pipeline_stateful.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ EncodedResults StatefulLLMPipeline::generate(
325325

326326
// Tail of previous output in chat mode is missing in KV cache.
327327
if (is_chat_conversation && m_chat_input_type == ov::genai::utils::GenerationChatInputsType::ENCODED_INPUTS) {
328-
ov::Tensor new_chat_tokens = ov::Tensor{ov::element::i64, {1, m_tokenized_chat_history.size()}, const_cast<int64_t*>(m_tokenized_chat_history.data())};
328+
ov::Tensor new_chat_tokens = ov::Tensor{ov::element::i64, {1, m_tokenized_chat_history.size()}, m_tokenized_chat_history.data()};
329329
ov::genai::align_kv_cache_and_history(new_chat_tokens, m_kv_cache_state);
330330

331331
auto encoded_input = get_chat_encoded_input(new_chat_tokens, m_kv_cache_state);
@@ -414,7 +414,7 @@ EncodedResults StatefulLLMPipeline::generate(
414414
tokenized_chat_hist.reserve(state.size() + input_ids.get_size());
415415
std::copy(state.begin(), state.end(), std::back_inserter(tokenized_chat_hist));
416416
std::copy(input_ids.data<int64_t>(), input_ids.data<int64_t>() + input_ids.get_size(), std::back_inserter(tokenized_chat_hist));
417-
sequence_group = std::make_shared<SequenceGroup>(request_id, ov::Tensor(ov::element::i64, {1, tokenized_chat_hist.size()}, const_cast<int64_t*>(tokenized_chat_hist.data())), config, block_size);
417+
sequence_group = std::make_shared<SequenceGroup>(request_id, ov::Tensor(ov::element::i64, {1, tokenized_chat_hist.size()}, tokenized_chat_hist.data()), config, block_size);
418418
} else {
419419
size_t seq_len = input_ids.get_shape().at(1);
420420
size_t batch_offset = request_id * seq_len;

src/cpp/src/llm/pipeline_static.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ EncodedResults StatefulLLMPipeline::generate(
336336
++position_ids_data;
337337
// However, attention_mask changes its shape on each iteration, it should be re-set explicitly
338338
attention_mask_data.push_back(1);
339-
m_request.set_tensor("attention_mask", ov::Tensor(ov::element::i64, ov::Shape{1,attention_mask_data.size()}, const_cast<int64_t*>(attention_mask_data.data())));
339+
m_request.set_tensor("attention_mask", ov::Tensor(ov::element::i64, ov::Shape{1,attention_mask_data.size()}, attention_mask_data.data()));
340340

341341
m_request.infer();
342342

src/cpp/src/lm_encoding.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ ov::genai::utils::GenerationFinishInfo get_lm_encoded_results(
259259
}
260260
}
261261

262-
m_llm.set_tensor("beam_idx", ov::Tensor{ov::element::i32, {total_num_tokens}, const_cast<int32_t*>(next_beams.data())});
262+
m_llm.set_tensor("beam_idx", ov::Tensor{ov::element::i32, {total_num_tokens}, next_beams.data()});
263263

264264
const auto infer_start = std::chrono::steady_clock::now();
265265
m_llm.start_async();

src/cpp/src/sequence_group.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,8 +353,9 @@ class SequenceGroup : public std::enable_shared_from_this<SequenceGroup> {
353353
using Ptr = std::shared_ptr<SequenceGroup>;
354354
using CPtr = std::shared_ptr<const SequenceGroup>;
355355

356+
// const_cast is safe as ov::Tensor only views the data and doesn't modify it.
356357
SequenceGroup(uint64_t request_id, const TokenIds& input_ids, const ov::genai::GenerationConfig& sampling_params, std::size_t block_size)
357-
: SequenceGroup(request_id, ov::Tensor(ov::element::i64, ov::Shape{input_ids.size()}, (void *)input_ids.data()), sampling_params, block_size, std::nullopt) {
358+
: SequenceGroup(request_id, ov::Tensor(ov::element::i64, ov::Shape{input_ids.size()}, const_cast<int64_t*>(input_ids.data())), sampling_params, block_size, std::nullopt) {
358359
}
359360

360361
SequenceGroup(uint64_t request_id,

src/cpp/src/speculative_decoding/speculative_decoding_stateful.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ int64_t LLMInferWrapper::infer_next(int64_t token, bool append_perf_stat) {
191191
++m_new_position_id;
192192
// However, attention_mask changes its shape on each iteration, it should be re-set explicitly
193193
m_new_atten_mask_data.push_back(1);
194-
m_request.set_tensor("attention_mask", ov::Tensor(ov::element::i64, ov::Shape{1,m_new_atten_mask_data.size()}, const_cast<int64_t*>(m_new_atten_mask_data.data())));
194+
m_request.set_tensor("attention_mask", ov::Tensor(ov::element::i64, ov::Shape{1,m_new_atten_mask_data.size()}, m_new_atten_mask_data.data()));
195195

196196
const auto infer_start = std::chrono::steady_clock::now();
197197
m_request.infer();

src/cpp/src/speech_generation/speecht5_tts_model.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,8 @@ Text2SpeechDecodedResults SpeechT5TTSImpl::generate(const std::vector<std::strin
155155
// prepare inputs for decoder
156156
std::vector<float> zeros(bsz * 1 * m_num_mel_bins, 0.0f);
157157
std::vector<float> empty_spectrogram;
158-
ov::Tensor inputs_embeds(ov::element::f32, ov::Shape{bsz, 1, m_num_mel_bins}, const_cast<float*>(zeros.data()));
159-
ov::Tensor spectrogram(ov::element::f32, ov::Shape{0, bsz, 2, m_num_mel_bins}, const_cast<float*>(empty_spectrogram.data()));
158+
ov::Tensor inputs_embeds(ov::element::f32, ov::Shape{bsz, 1, m_num_mel_bins}, zeros.data());
159+
ov::Tensor spectrogram(ov::element::f32, ov::Shape{0, bsz, 2, m_num_mel_bins}, empty_spectrogram.data());
160160

161161
int64_t iter = 0;
162162
// decoder loop

src/cpp/src/visual_language/minicpm/classes.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,12 @@ ResampledImage VisionEncoderMiniCPM::resample_encoded_image(const EncodedImage&
455455
for (size_t ja = 0; ja < slices_shape.at(1); ++ja) {
456456
size_t d2 = slices_shape.at(2);
457457
size_t d3 = slices_shape.at(3);
458-
ov::Tensor encoded_view{ov::element::f32, {1, d2, d3}, slices.data<float>() + (i * slices_shape.at(1) + ja) * d2 * d3};
458+
// const_cast is safe as ov::Tensor only views the data and doesn't modify it.
459+
ov::Tensor encoded_view{
460+
ov::element::f32,
461+
{1, d2, d3},
462+
const_cast<float*>(slices.data<float>()) + (i * slices_shape.at(1) + ja) * d2 * d3
463+
};
459464
vision_embeds[ja] = resample(encoded_view, target_size, pad_to_max);
460465
}
461466
vision_embed_tensors[i] = vision_embeds;

src/cpp/src/visual_language/phi3_vision/classes.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -661,11 +661,12 @@ std::vector<std::variant<ov::Tensor, size_t>> drop_image_placeholders(const ov::
661661
text_start = offset;
662662
chunks.push_back(size_t(-(last_token + 1)));
663663
} else if (last_token >= 0 && next_token < 0) {
664+
// const_cast is safe as ov::Tensor only views the data and doesn't modify it.
664665
chunks.emplace_back(
665666
std::in_place_type<ov::Tensor>,
666667
ov::element::i64,
667668
ov::Shape{1, offset - text_start},
668-
tokens.data<int64_t>() + text_start
669+
const_cast<int64_t*>(tokens.data<int64_t>()) + text_start
669670
);
670671
} else if (last_token < 0 && next_token < 0 && last_token != next_token) {
671672
chunks.push_back(size_t(-(last_token + 1)));
@@ -675,11 +676,12 @@ std::vector<std::variant<ov::Tensor, size_t>> drop_image_placeholders(const ov::
675676
// Add the last chunk
676677
size_t full_length = tokens.get_shape().at(1);
677678
if (last_token >= 0) {
679+
// const_cast is safe as ov::Tensor only views the data and doesn't modify it.
678680
chunks.emplace_back(
679681
std::in_place_type<ov::Tensor>,
680682
ov::element::i64,
681683
ov::Shape{1, full_length - text_start},
682-
tokens.data<int64_t>() + text_start
684+
const_cast<int64_t*>(tokens.data<int64_t>()) + text_start
683685
);
684686
} else {
685687
chunks.push_back(size_t(-(last_token + 1)));

src/cpp/src/visual_language/phi4mm/classes.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -728,6 +728,7 @@ EncodedImage VisionEncoderPhi4MM::encode(const ov::Tensor& image, const ov::AnyM
728728
{
729729
ov::Tensor height{ov::element::i32, {}};
730730
ov::Tensor width{ov::element::i32, {}};
731+
// const_cast is safe as ov::Tensor only views the data and doesn't modify it.
731732
ov::Tensor sub_GN{ov::element::f32, {1, 1, 1, m_vlm_config.sub_GN.size()}, const_cast<float*>(m_vlm_config.sub_GN.data())};
732733
ov::Tensor glb_GN{ov::element::f32, {1, 1, m_vlm_config.glb_GN.size()}, const_cast<float*>(m_vlm_config.glb_GN.data())};
733734
height.data<int32_t>()[0] = image_height;

0 commit comments

Comments
 (0)