11#!/usr/bin/env python3
22"""
3- llama-tornado: GPU-accelerated LLaMA.java runner with TornadoVM
4- Run LLaMA models using either OpenCL or PTX backends.
3+ llama-tornado: GPU-accelerated Java LLM runner with TornadoVM
4+ Run LLM models using either OpenCL or PTX backends.
55"""
66
77import argparse
@@ -19,7 +19,7 @@ class Backend(Enum):
1919 PTX = "ptx"
2020
2121class LlamaRunner :
22- """Main class for managing LLaMA model execution with GPU acceleration."""
22+ """Main class for managing LLM execution with GPU acceleration."""
2323
2424 def __init__ (self ):
2525 self .java_home = os .environ .get ('JAVA_HOME' )
@@ -266,29 +266,29 @@ def create_parser() -> argparse.ArgumentParser:
266266 """Create and configure the argument parser."""
267267 parser = argparse .ArgumentParser (
268268 prog = "llama-tornado" ,
269- description = "GPU-accelerated LLaMA.java model runner using TornadoVM" ,
269+ description = "GPU-accelerated LLM runner using TornadoVM" ,
270270 formatter_class = argparse .ArgumentDefaultsHelpFormatter
271271 )
272272
273273 # Required arguments
274274 parser .add_argument ("--model" , dest = "model_path" , required = True ,
275- help = "Path to the LLaMA model file (e.g., Llama-3.2-1B-Instruct-Q8_0.gguf)" )
275+ help = "Path to the LLM gguf file (e.g., Llama-3.2-1B-Instruct-Q8_0.gguf)" )
276276
277- # LLaMA arguments
278- llama_group = parser .add_argument_group ("LLaMA Configuration" )
279- llama_group .add_argument ("--prompt" , help = "Input prompt for the model" )
280- llama_group .add_argument ("-sp" , "--system-prompt" , help = "System prompt for the model" )
281- llama_group .add_argument ("--temperature" , type = float , default = 0.1 ,
277+ # LLM arguments
278+ llm_group = parser .add_argument_group ("LLaMA Configuration" )
279+ llm_group .add_argument ("--prompt" , help = "Input prompt for the model" )
280+ llm_group .add_argument ("-sp" , "--system-prompt" , help = "System prompt for the model" )
281+ llm_group .add_argument ("--temperature" , type = float , default = 0.1 ,
282282 help = "Sampling temperature (0.0 to 2.0)" )
283- llama_group .add_argument ("--top-p" , type = float , default = 0.95 ,
283+ llm_group .add_argument ("--top-p" , type = float , default = 0.95 ,
284284 help = "Top-p sampling parameter" )
285- llama_group .add_argument ("--seed" , type = int , default = None ,
285+ llm_group .add_argument ("--seed" , type = int , default = None ,
286286 help = "Random seed (default: current timestamp)" )
287- llama_group .add_argument ("-n" , "--max-tokens" , type = int , default = 512 ,
287+ llm_group .add_argument ("-n" , "--max-tokens" , type = int , default = 512 ,
288288 help = "Maximum number of tokens to generate" )
289- llama_group .add_argument ("--stream" , type = bool , default = True ,
289+ llm_group .add_argument ("--stream" , type = bool , default = True ,
290290 help = "Enable streaming output" )
291- llama_group .add_argument ("--echo" , type = bool , default = False ,
291+ llm_group .add_argument ("--echo" , type = bool , default = False ,
292292 help = "Echo the input prompt" )
293293 llm_group .add_argument ("--suffix" , help = "Suffix for fill-in-the-middle request (Codestral)" )
294294
0 commit comments