Skip to content

Commit 7fe7f74

Browse files
committed
change max_new_tokens to max_tokens so to follow the webui token adjustment
1 parent 91b3808 commit 7fe7f74

File tree

2 files changed

+9
-13
lines changed

2 files changed

+9
-13
lines changed

src/embeddedllm/backend/openvino_engine.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import contextlib
22
from io import BytesIO
33
import time
4-
import requests
54
import os
65
from PIL import Image
76
from pathlib import Path
@@ -140,6 +139,14 @@ async def generate_vision(
140139
assert "image" in mime_type
141140

142141
image = Image.open(BytesIO(file_data))
142+
input_token_length = self.processor.calc_num_image_tokens(image)[0]
143+
max_tokens = sampling_params.max_tokens
144+
145+
assert input_token_length is not None
146+
147+
if input_token_length + max_tokens > self.max_model_len:
148+
raise ValueError("Exceed Context Length")
149+
143150

144151
messages = [
145152
{'role': 'user', 'content': f'<|image_1|>\n{prompt_text}'}
@@ -163,7 +170,7 @@ async def generate_vision(
163170

164171
try:
165172
generation_options = {
166-
'max_new_tokens': sampling_params.max_new_tokens,
173+
'max_new_tokens': max_tokens,
167174
'do_sample': False,
168175
}
169176
token_list = self.model.generate(

src/embeddedllm/sampling_params.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,6 @@ class SamplingParams:
8787
ignore_eos: Whether to ignore the EOS token and continue generating
8888
tokens after the EOS token is generated.
8989
max_tokens: Maximum number of tokens to generate per output sequence.
90-
max_new_tokens: Maximum number of new tokens to generate per output sequence. (similar to max_tokens)
9190
min_tokens: Minimum number of tokens to generate per output sequence
9291
before EOS or stop_token_ids can be generated
9392
logprobs: Number of log probabilities to return per output token.
@@ -129,7 +128,6 @@ def __init__(
129128
include_stop_str_in_output: bool = False,
130129
ignore_eos: bool = False,
131130
max_tokens: Optional[int] = 16,
132-
max_new_tokens: Optional[int] = 16,
133131
min_tokens: int = 0,
134132
logprobs: Optional[int] = None,
135133
prompt_logprobs: Optional[int] = None,
@@ -167,7 +165,6 @@ def __init__(
167165
self.stop_token_ids = list(stop_token_ids)
168166
self.ignore_eos = ignore_eos
169167
self.max_tokens = max_tokens
170-
self.max_new_tokens = max_new_tokens
171168
self.min_tokens = min_tokens
172169
self.logprobs = logprobs
173170
self.prompt_logprobs = prompt_logprobs
@@ -235,8 +232,6 @@ def _verify_args(self) -> None:
235232
raise ValueError("min_p must be in [0, 1], got " f"{self.min_p}.")
236233
if self.max_tokens is not None and self.max_tokens < 1:
237234
raise ValueError(f"max_tokens must be at least 1, got {self.max_tokens}.")
238-
if self.max_new_tokens is not None and self.max_new_tokens < 1:
239-
raise ValueError(f"max_new_tokens must be at least 1, got {self.max_new_tokens}.")
240235
if self.min_tokens < 0:
241236
raise ValueError(
242237
f"min_tokens must be greater than or equal to 0, " f"got {self.min_tokens}."
@@ -246,11 +241,6 @@ def _verify_args(self) -> None:
246241
f"min_tokens must be less than or equal to "
247242
f"max_tokens={self.max_tokens}, got {self.min_tokens}."
248243
)
249-
if self.max_new_tokens is not None and self.min_tokens > self.max_new_tokens:
250-
raise ValueError(
251-
f"min_tokens must be less than or equal to "
252-
f"max_new_tokens={self.max_new_tokens}, got {self.min_tokens}."
253-
)
254244
if self.logprobs is not None and self.logprobs < 0:
255245
raise ValueError(f"logprobs must be non-negative, got {self.logprobs}.")
256246
if self.prompt_logprobs is not None and self.prompt_logprobs < 0:
@@ -358,7 +348,6 @@ def __repr__(self) -> str:
358348
f"include_stop_str_in_output={self.include_stop_str_in_output}, "
359349
f"ignore_eos={self.ignore_eos}, "
360350
f"max_tokens={self.max_tokens}, "
361-
f"max_new_tokens={self.max_new_tokens}, "
362351
f"min_tokens={self.min_tokens}, "
363352
f"logprobs={self.logprobs}, "
364353
f"prompt_logprobs={self.prompt_logprobs}, "

0 commit comments

Comments
 (0)