Skip to content

Commit 27b0045

Browse files
committed
Cleanup code
1 parent 30fb9b4 commit 27b0045

File tree

9 files changed

+46
-170
lines changed

9 files changed

+46
-170
lines changed

examples/LPU_inference.py

Lines changed: 0 additions & 28 deletions
This file was deleted.

examples/lpu_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def post_http_request(prompt: str,
2929
"n": n,
3030
"use_beam_search": False,
3131
"temperature": 0.8,
32-
"max_tokens": 32,
32+
"max_tokens": 40,
3333
"top_p": 0.95,
3434
"top_k": 1,
3535
"stream": stream,

examples/mini_testbench.sh

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,12 @@
22
log_sum="log/service_model_device.txt"
33

44
model_ids=("TinyLlama/TinyLlama-1.1B-Chat-v1.0") # "facebook/opt-1.3b" "huggyllama/llama-7b")
5-
num_devices=(1 2 4)
5+
num_devices=(2)
66

77
current_datetime=$(date "+%Y-%m-%d %H:%M:%S")
88
echo "$current_datetime"
99
echo "$current_datetime" >> ${log_sum}
1010

11-
"""
1211
for model_id in "${model_ids[@]}"; do
1312
for num_device in "${num_devices[@]}"; do
1413
#IFS='\' read -ra parts <<< "$model_id"
@@ -19,12 +18,11 @@ for model_id in "${model_ids[@]}"; do
1918
echo "*********************************"
2019
python lpu_inference_arg.py -m ${model_id} -n ${num_device} > log/inference_${model_name}_${num_device}.txt
2120
echo "*********************************" >> ${log_sum}
22-
echo "The Result of log/inference_${model_name}_${num_device}.txt" >> ${log_sum}
21+
echo "[Testbench] The Result of log/inference_${model_name}_${num_device}.txt" >> ${log_sum}
2322
tail -n 1 "log/inference_${model_name}_${num_device}.txt" >> ${log_sum}
2423
echo "" >> ${log_sum}
2524
done
2625
done
27-
"""
2826

2927
for model_id in "${model_ids[@]}"; do
3028
for num_device in "${num_devices[@]}"; do
@@ -36,10 +34,10 @@ for model_id in "${model_ids[@]}"; do
3634

3735
# Waiting for server
3836
while ! nc -z localhost "8000"; do
39-
echo "Waiting for server..."
37+
echo "[Testbench] Waiting for server..."
4038
sleep 3
4139
done
42-
echo "The server is ready!"
40+
echo "[Testbench] The server is ready!"
4341

4442
python lpu_client.py > log/vllm_serve_${model_name}_${num_device}.txt
4543

@@ -49,10 +47,10 @@ for model_id in "${model_ids[@]}"; do
4947
kill -SIGINT "$PID"
5048
while true; do
5149
if ps -p "$PID" > /dev/null; then
52-
echo "Kill the process..."
50+
echo "[Testbench] Kill the process..."
5351
sleep 3
5452
else
55-
echo "Process (PID: $PID) is killed."
53+
echo "[Testbench] Process (PID: $PID) is killed."
5654
break
5755
fi
5856
done

vllm/core/scheduler.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -610,7 +610,6 @@ def _schedule_running(
610610
else:
611611
#self._append_slots(seq_group, blocks_to_copy)
612612
is_prefill = seq_group.is_prefill()
613-
print_logger(is_prefill)
614613
scheduled_seq_group: ScheduledSequenceGroup = \
615614
self._scheduled_seq_group_cache[self.cache_id].get_object()
616615
scheduled_seq_group.seq_group = seq_group

vllm/engine/async_llm_engine.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -806,7 +806,6 @@ def shutdown_background_loop(self) -> None:
806806
if self._background_loop_unshielded is not None:
807807
self._background_loop_unshielded.cancel()
808808
self._background_loop_unshielded = None
809-
print_logger("shutdown")
810809
self.background_loop = None
811810

812811
def _init_engine(self, *args,
@@ -935,11 +934,8 @@ async def run_engine_loop(self):
935934
asyncio.create_task(
936935
self.engine_step(virtual_engine)))
937936
has_requests_in_progress[virtual_engine] = True
938-
print_logger(has_unfinished_requests)
939937
else:
940938
has_requests_in_progress[virtual_engine] = False
941-
print_logger(has_unfinished_requests)
942-
#self.engine.model_executor.cleanup()
943939
except asyncio.TimeoutError as exc:
944940
logger.error(
945941
"Engine iteration timed out. This should never happen!")
@@ -1226,7 +1222,6 @@ async def check_health(self) -> None:
12261222
t = time.perf_counter()
12271223
logger.debug("Starting health check...")
12281224
if self.is_stopped:
1229-
print_logger("is_stopped")
12301225
raise AsyncEngineDeadError("Background loop is stopped.")
12311226

12321227
if self.engine_use_ray:

vllm/entrypoints/api_server.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,6 @@ async def run_server(args: Namespace,
131131
ssl_cert_reqs=args.ssl_cert_reqs,
132132
**uvicorn_kwargs,
133133
)
134-
print_logger("Detect crtl+C")
135134
await shutdown_task
136135
engine.engine.model_executor.cleanup()
137136

vllm/executor/lpu_executor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def _init_executor(self) -> None: #HJ: why not __init__ ?
3030
self.model_config.dtype = torch.bfloat16
3131

3232
# Instantiate the worker and load the model to the device.
33-
#vLLM does not use torch distributed library to execute multi-LPU
33+
# NOTE(hyunjun): vLLM does not use torch distributed library to execute multi-LPU
3434
self.num_device = self.parallel_config.tensor_parallel_size
3535
if self.parallel_config.tensor_parallel_size > 1:
3636
self.parallel_config.tensor_parallel_size = 1

0 commit comments

Comments
 (0)