Skip to content

Commit eb64c87

Browse files
authored
Const-related fixes on tesor creation (#3026)
Fixes const-related casting issues in tests and code.
1 parent 1d5d3b8 commit eb64c87

File tree

13 files changed

+63
-62
lines changed

13 files changed

+63
-62
lines changed

src/cpp/src/image_generation/models/autoencoder_kl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ class DiagonalGaussianDistribution {
3232
OPENVINO_ASSERT(shape[0] == 1, "Batch size must be 1");
3333
shape[1] /= 2;
3434

35-
m_mean = ov::Tensor(parameters.get_element_type(), shape, parameters.data());
35+
m_mean = ov::Tensor(parameters.get_element_type(), shape, const_cast<void*>(parameters.data()));
3636
m_std = ov::Tensor(m_mean.get_element_type(), shape);
3737
ov::Tensor logvar(parameters.get_element_type(), shape, m_mean.data<float>() + m_mean.get_size());
3838

src/cpp/src/llm/pipeline_stateful.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ EncodedResults StatefulLLMPipeline::generate(
325325

326326
// Tail of previous output in chat mode is missing in KV cache.
327327
if (is_chat_conversation && m_chat_input_type == ov::genai::utils::GenerationChatInputsType::ENCODED_INPUTS) {
328-
ov::Tensor new_chat_tokens = ov::Tensor{ov::element::i64, {1, m_tokenized_chat_history.size()}, m_tokenized_chat_history.data()};
328+
ov::Tensor new_chat_tokens = ov::Tensor{ov::element::i64, {1, m_tokenized_chat_history.size()}, const_cast<int64_t*>(m_tokenized_chat_history.data())};
329329
ov::genai::align_kv_cache_and_history(new_chat_tokens, m_kv_cache_state);
330330

331331
auto encoded_input = get_chat_encoded_input(new_chat_tokens, m_kv_cache_state);
@@ -414,7 +414,7 @@ EncodedResults StatefulLLMPipeline::generate(
414414
tokenized_chat_hist.reserve(state.size() + input_ids.get_size());
415415
std::copy(state.begin(), state.end(), std::back_inserter(tokenized_chat_hist));
416416
std::copy(input_ids.data<int64_t>(), input_ids.data<int64_t>() + input_ids.get_size(), std::back_inserter(tokenized_chat_hist));
417-
sequence_group = std::make_shared<SequenceGroup>(request_id, ov::Tensor(ov::element::i64, {1, tokenized_chat_hist.size()}, tokenized_chat_hist.data()), config, block_size);
417+
sequence_group = std::make_shared<SequenceGroup>(request_id, ov::Tensor(ov::element::i64, {1, tokenized_chat_hist.size()}, const_cast<int64_t*>(tokenized_chat_hist.data())), config, block_size);
418418
} else {
419419
size_t seq_len = input_ids.get_shape().at(1);
420420
size_t batch_offset = request_id * seq_len;

src/cpp/src/llm/pipeline_static.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ EncodedResults StatefulLLMPipeline::generate(
336336
++position_ids_data;
337337
// However, attention_mask changes its shape on each iteration, it should be re-set explicitly
338338
attention_mask_data.push_back(1);
339-
m_request.set_tensor("attention_mask", ov::Tensor(ov::element::i64, ov::Shape{1,attention_mask_data.size()}, (void*)&attention_mask_data[0]));
339+
m_request.set_tensor("attention_mask", ov::Tensor(ov::element::i64, ov::Shape{1,attention_mask_data.size()}, const_cast<int64_t*>(attention_mask_data.data())));
340340

341341
m_request.infer();
342342

src/cpp/src/lm_encoding.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ ov::genai::utils::GenerationFinishInfo get_lm_encoded_results(
259259
}
260260
}
261261

262-
m_llm.set_tensor("beam_idx", ov::Tensor{ov::element::i32, {total_num_tokens}, next_beams.data()});
262+
m_llm.set_tensor("beam_idx", ov::Tensor{ov::element::i32, {total_num_tokens}, const_cast<int32_t*>(next_beams.data())});
263263

264264
const auto infer_start = std::chrono::steady_clock::now();
265265
m_llm.start_async();

src/cpp/src/speculative_decoding/speculative_decoding_stateful.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ int64_t LLMInferWrapper::infer_next(int64_t token, bool append_perf_stat) {
191191
++m_new_position_id;
192192
// However, attention_mask changes its shape on each iteration, it should be re-set explicitly
193193
m_new_atten_mask_data.push_back(1);
194-
m_request.set_tensor("attention_mask", ov::Tensor(ov::element::i64, ov::Shape{1,m_new_atten_mask_data.size()}, (void*)&m_new_atten_mask_data[0]));
194+
m_request.set_tensor("attention_mask", ov::Tensor(ov::element::i64, ov::Shape{1,m_new_atten_mask_data.size()}, const_cast<int64_t*>(m_new_atten_mask_data.data())));
195195

196196
const auto infer_start = std::chrono::steady_clock::now();
197197
m_request.infer();

src/cpp/src/speech_generation/speecht5_tts_model.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,9 @@ Text2SpeechDecodedResults SpeechT5TTSImpl::generate(const std::vector<std::strin
154154

155155
// prepare inputs for decoder
156156
std::vector<float> zeros(bsz * 1 * m_num_mel_bins, 0.0f);
157-
ov::Tensor inputs_embeds(ov::element::f32, ov::Shape{bsz, 1, m_num_mel_bins}, zeros.data());
158-
ov::Tensor spectrogram(ov::element::f32, ov::Shape{0, bsz, 2, m_num_mel_bins}, std::vector<float>{}.data());
157+
std::vector<float> empty_spectrogram;
158+
ov::Tensor inputs_embeds(ov::element::f32, ov::Shape{bsz, 1, m_num_mel_bins}, const_cast<float*>(zeros.data()));
159+
ov::Tensor spectrogram(ov::element::f32, ov::Shape{0, bsz, 2, m_num_mel_bins}, const_cast<float*>(empty_spectrogram.data()));
159160

160161
int64_t iter = 0;
161162
// decoder loop

src/cpp/src/visual_language/phi4mm/classes.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -728,8 +728,8 @@ EncodedImage VisionEncoderPhi4MM::encode(const ov::Tensor& image, const ov::AnyM
728728
{
729729
ov::Tensor height{ov::element::i32, {}};
730730
ov::Tensor width{ov::element::i32, {}};
731-
ov::Tensor sub_GN{ov::element::f32, {1, 1, 1, m_vlm_config.sub_GN.size()}, m_vlm_config.sub_GN.data()};
732-
ov::Tensor glb_GN{ov::element::f32, {1, 1, m_vlm_config.glb_GN.size()}, m_vlm_config.glb_GN.data()};
731+
ov::Tensor sub_GN{ov::element::f32, {1, 1, 1, m_vlm_config.sub_GN.size()}, const_cast<float*>(m_vlm_config.sub_GN.data())};
732+
ov::Tensor glb_GN{ov::element::f32, {1, 1, m_vlm_config.glb_GN.size()}, const_cast<float*>(m_vlm_config.glb_GN.data())};
733733
height.data<int32_t>()[0] = image_height;
734734
width.data<int32_t>()[0] = image_width;
735735
CircularBufferQueueElementGuard<ov::InferRequest> lock{m_separator_inserters.get()};

src/cpp/src/visual_language/qwen2vl/classes.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -807,7 +807,7 @@ void VisionEncoderQwen2VL::encode_with_imagepreprocess_ov(const std::vector<ov::
807807
const float VIDEO_BRANCH_CONDITION = 0.f;
808808
const float IMAGE_BRANCH_CONDITION = 1.f;
809809
std::vector<float> cond_img_vid_data{images.size() == 2u ? VIDEO_BRANCH_CONDITION : IMAGE_BRANCH_CONDITION};
810-
ov::Tensor cond_img_vid(ov::element::f32, ov::Shape{1}, cond_img_vid_data.data());
810+
ov::Tensor cond_img_vid(ov::element::f32, ov::Shape{1}, const_cast<float*>(cond_img_vid_data.data()));
811811
ov::Tensor input_image_1(ov::element::u8, image_shape, images[0].data<uint8_t>());
812812
ov::Tensor input_image_2(ov::element::u8,
813813
image_shape,

src/cpp/src/whisper/whisper.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ std::pair<ov::genai::EncodedResults, bool> decode(std::shared_ptr<ov::genai::Whi
7777
ov::Tensor beam_idx = decoder->create_host_tensor(ov::element::i32, {batch_size});
7878
std::fill_n(beam_idx.data<int32_t>(), batch_size, 0);
7979

80-
const ov::Tensor input_ids_tensor{ov::element::i64, {1, input_ids.size()}, (void*)input_ids.data()};
80+
const ov::Tensor input_ids_tensor{ov::element::i64, {1, input_ids.size()}, const_cast<int64_t*>(input_ids.data())};
8181

8282
const auto infer_start = std::chrono::steady_clock::now();
8383
decoder->start_async(encoder_hidden_state, input_ids_tensor, beam_idx);
@@ -202,7 +202,7 @@ ov::Tensor encode(ov::InferRequest& request,
202202
". Actual size: ",
203203
mel_data.size(),
204204
".");
205-
ov::Tensor input_tensor(ov::element::f32, {1, feature_size, nb_max_frames}, mel_data.data());
205+
ov::Tensor input_tensor(ov::element::f32, {1, feature_size, nb_max_frames}, const_cast<float*>(mel_data.data()));
206206

207207
request.set_tensor("input_features", input_tensor);
208208

tests/cpp/block_manager.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ TEST(TestBlockManager, general_test) {
1515
ov::genai::SequenceGroup::Ptr sequence_group = std::make_shared<ov::genai::SequenceGroup>(
1616
0,
1717
ov::Tensor(ov::element::i64, {
18-
prompt_ids.size()}, prompt_ids.data()),
18+
prompt_ids.size()}, const_cast<int64_t*>(prompt_ids.data())),
1919
ov::genai::beam_search(),
2020
4);
2121
auto sequence = sequence_group->get_not_finished_sequences()[0];
@@ -44,11 +44,11 @@ TEST(TestBlockManager, general_test) {
4444
TEST(TestBlockManager, required_blocks_count) {
4545
ov::genai::BlockManager bm = ov::genai::BlockManager(8, false, 4, 3);
4646

47-
std::vector<uint64_t> tokens = {0,1,2,3,4};
47+
std::vector<int64_t> tokens = {0,1,2,3,4};
4848
ov::genai::SequenceGroup::Ptr sequence_group = std::make_shared<ov::genai::SequenceGroup>(
4949
0,
5050
ov::Tensor(ov::element::i64, {
51-
tokens.size()}, tokens.data()),
51+
tokens.size()}, const_cast<int64_t*>(tokens.data())),
5252
ov::genai::beam_search(),
5353
4);
5454
sequence_group->schedule_tokens(5);
@@ -94,11 +94,11 @@ TEST(TestBlockManager, CanFreeBlocksFromSequence) {
9494
const size_t BLOCK_SIZE = 2;
9595
ov::genai::BlockManager bm = ov::genai::BlockManager(8, false, BLOCK_SIZE, 3);
9696

97-
std::vector<uint64_t> tokens = {0,1,2,3,4};
97+
std::vector<int64_t> tokens = {0,1,2,3,4};
9898
ov::genai::SequenceGroup::Ptr sequence_group = std::make_shared<ov::genai::SequenceGroup>(
9999
0,
100100
ov::Tensor(ov::element::i64, {
101-
tokens.size()}, tokens.data()),
101+
tokens.size()}, const_cast<int64_t*>(tokens.data())),
102102
ov::genai::beam_search(),
103103
BLOCK_SIZE);
104104
sequence_group->schedule_tokens(5);

0 commit comments

Comments
 (0)