Skip to content

Commit 20f68f3

Browse files
committed
up
1 parent 5be08ac commit 20f68f3

File tree

3 files changed

+13
-11
lines changed

3 files changed

+13
-11
lines changed

fastdeploy/demo/offline_demo.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,9 @@
1717
from fastdeploy.engine.sampling_params import SamplingParams
1818
from fastdeploy.entrypoints.llm import LLM
1919

20-
model_name_or_path = "/workspace/ERNIE-4.5-0.3B-Paddle"
21-
22-
# 超参设置
23-
sampling_params = SamplingParams(temperature=0.1, max_tokens=30, prompt_logprobs=100)
24-
llm = LLM(model=model_name_or_path, tensor_parallel_size=1, enable_prefix_caching=False)
25-
output = llm.generate(prompts="who are you?", use_tqdm=True, sampling_params=sampling_params)
20+
model_name_or_path = "PaddlePaddle/ERNIE-4.5-0.3B-Paddle"
21+
sampling_params = SamplingParams(temperature=0.1, max_tokens=30)
22+
llm = LLM(model=model_name_or_path)
23+
output = llm.generate(prompts="who are you?", use_tqdm=True, sampling_params=sampling_params)
2624

2725
print(output)

fastdeploy/engine/engine.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,7 @@ def add_requests(self, task, sampling_params=None, **kwargs):
325325
raise EngineError(err_msg, error_code=400)
326326

327327
request.metrics.preprocess_end_time = time.time()
328+
request.metrics.scheduler_recv_req_time = time.time()
328329
self.engine.scheduler.put_requests([request])
329330
llm_logger.info(f"Cache task with request_id ({request.get('request_id')})")
330331
llm_logger.debug(f"cache task: {request}")

fastdeploy/engine/request.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,8 @@ def record_recv_token(self, cur_time: float = None):
523523
cur_time = time.time() if cur_time is None else cur_time
524524
self.engine_recv_latest_token_time = cur_time
525525
self.llm_engine_recv_latest_token_timestamp = cur_time
526+
self.model_execute_time = cur_time - self.arrival_time
527+
self.model_forward_time = cur_time - self.inference_start_time
526528

527529
def record_decode_recv_second_token(self):
528530
cur_time = time.time()
@@ -540,7 +542,6 @@ def cal_cost_time(self):
540542
self.first_token_time = self.engine_recv_first_token_time - self.inference_start_time
541543
self.time_in_queue = time.time() - self.preprocess_end_time
542544
self.preprocess_cost_time = self.preprocess_end_time - self.preprocess_start_time
543-
self.model_execute_time = self.engine_recv_first_token_time - self.inference_start_time
544545
self.request_start_time = self.arrival_time
545546

546547
# for compatibility with old metrics
@@ -623,10 +624,12 @@ def add(self, next_output: RequestOutput) -> None:
623624
self.outputs.index = next_output.outputs.index
624625
self.outputs.token_ids.extend(next_output.outputs.token_ids)
625626

626-
if next_output.metrics.arrival_time is not None and self.metrics.inference_start_time is not None:
627-
self.metrics.model_forward_time = next_output.metrics.arrival_time - self.metrics.inference_start_time
628-
if next_output.metrics.arrival_time is not None and self.metrics.arrival_time is not None:
629-
self.metrics.model_execute_time = next_output.metrics.arrival_time - self.metrics.arrival_time
627+
if next_output.metrics.model_forward_time is not None:
628+
self.metrics.model_forward_time = next_output.metrics.model_forward_time
629+
if next_output.metrics.model_execute_time is not None:
630+
self.metrics.model_execute_time = next_output.metrics.model_execute_time
631+
if next_output.metrics.engine_recv_latest_token_time is not None:
632+
self.metrics.engine_recv_latest_token_time = next_output.metrics.engine_recv_latest_token_time
630633
if next_output.outputs.top_logprobs is not None:
631634
self.outputs.top_logprobs.logprob_token_ids.extend(next_output.outputs.top_logprobs.logprob_token_ids)
632635
self.outputs.top_logprobs.logprobs.extend(next_output.outputs.top_logprobs.logprobs)

0 commit comments

Comments
 (0)