Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 24b4b41

Browse files
tikikunhiento09
authored andcommitted
fix for edge case of exiting word
1 parent 2981f81 commit 24b4b41

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

cpp/tensorrt_llm/nitro/controllers/tensorrtllm.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,9 @@ GenerationInput::TensorPtr tensorrtllm::getTensorSingleStopWordList(int stopToke
119119

120120
GenerationInput::TensorPtr tensorrtllm::getTensorChatMLStopWordList()
121121
{
122-
std::vector<int32_t> stopWordsTokens = {28789, 28766, 321, 28730, 416, 28766, 28767, 2, 32000, 7, 8, 9, -1, -1, -1,
123-
-1, -1, -1}; // Extend with -1 for increased length
124-
return gptSession->getBufferManager().copyFrom(stopWordsTokens, ITensor::makeShape({1, 2, 9}), MemoryType::kGPU);
122+
std::vector<int32_t> stopWordsTokens = { 28766, 321, 28730, 416, 28766, 28767, 2, 32000, 6, 7, 8, -1, -1, -1,
123+
-1, -1}; // Extend with -1 for increased length
124+
return gptSession->getBufferManager().copyFrom(stopWordsTokens, ITensor::makeShape({1, 2, 8}), MemoryType::kGPU);
125125
}
126126

127127
GenerationInput tensorrtllm::createGenerationInput(std::vector<int32_t> inputIdsHost)
@@ -189,6 +189,7 @@ void inferenceThread(std::shared_ptr<inferenceState> inferState, std::vector<int
189189
// Valid prevPos, proceed with slicing the string from prevPos to the end
190190
std::string stringTok(text.begin() + inferState->prevPos, text.end());
191191
std::lock_guard<std::mutex> guard(inferState->queueMutex); // Protect access with a lock
192+
std::cout << stringTok << std::endl;
192193
inferState->textsToStream.push(stringTok);
193194
}
194195
else if (inferState->prevPos >= text.size())

0 commit comments

Comments
 (0)