Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit f125560

Browse files
tikikunhiento09
authored andcommitted
better default value
1 parent f0482cc commit f125560

File tree

2 files changed

+3
-4
lines changed

2 files changed

+3
-4
lines changed

cpp/tensorrt_llm/nitro/controllers/tensorrtllm.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,6 @@ void tensorrtllm::chat_completion(
256256
samplingConfig.topP = std::vector{completion.top_p};
257257
samplingConfig.minLength = std::vector{outputLen};
258258
samplingConfig.repetitionPenalty = std::vector{completion.frequency_penalty};
259-
260259
// Input preparation
261260

262261
std::thread infThread(inferenceThread, inferState, inputIdsHost, callback, this,samplingConfig,inputLen,outputLen);

cpp/tensorrt_llm/nitro/models/chat_completion_request.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,11 @@ inline inferences::ChatCompletionRequest fromRequest(const HttpRequest& req) {
2121
inferences::ChatCompletionRequest completion;
2222
if (jsonBody) {
2323
completion.stream = (*jsonBody).get("stream", false).asBool();
24-
completion.max_tokens = (*jsonBody).get("max_tokens", 500).asInt();
24+
completion.max_tokens = (*jsonBody).get("max_tokens", 2048).asInt();
2525
completion.top_p = (*jsonBody).get("top_p", 0.95).asFloat();
26-
completion.temperature = (*jsonBody).get("temperature", 0.8).asFloat();
26+
completion.temperature = (*jsonBody).get("temperature", 0.0f).asFloat();
2727
completion.frequency_penalty =
28-
(*jsonBody).get("frequency_penalty", 0).asFloat();
28+
(*jsonBody).get("frequency_penalty", 1.3).asFloat();
2929
completion.presence_penalty =
3030
(*jsonBody).get("presence_penalty", 0).asFloat();
3131
completion.messages = (*jsonBody)["messages"];

0 commit comments

Comments
 (0)