@@ -185,6 +185,14 @@ class LlamaRunner:
185185 def _add_llama_args (self , cmd : List [str ], args : argparse .Namespace ) -> List [str ]:
186186 """Add LLaMA-specific arguments to the command."""
187187
188+ # For service mode, only pass the model path and max-tokens
189+ if hasattr (args , 'service' ) and args .service :
190+ llama_args = [
191+ "--model" , args .model_path ,
192+ "--max-tokens" , str (args .max_tokens ),
193+ ]
194+ return cmd + llama_args
195+
188196 llama_args = [
189197 "--model" , args .model_path ,
190198 "--temperature" , str (args .temperature ),
@@ -219,14 +227,19 @@ class LlamaRunner:
219227
220228 # Show service-specific information
221229 if args .service :
222- print ("Starting TornadoVM LLM REST API Service..." )
230+ print ("Starting GPULlama3.java REST API Service..." )
223231 print (f"Model: { args .model_path } " )
224- print ("API endpoints will be available at:" )
225- print (" - http://localhost:8080/v1/completions" )
226- print (" - http://localhost:8080/v1/completions/stream" )
227- print (" - http://localhost:8080/v1/models" )
228- print (" - http://localhost:8080/v1/health" )
229- print ("\n Press Ctrl+C to stop the service" )
232+ print ("API endpoints available at:" )
233+ print (" - http://localhost:8080/chat" )
234+ print (" - http://localhost:8080/chat/stream" )
235+ print (" - http://localhost:8080/health" )
236+ print ("" )
237+ print ("Example usage:" )
238+ print (' curl -X POST http://localhost:8080/chat \\ ' )
239+ print (' -H "Content-Type: application/json" \\ ' )
240+ print (' -d \' {"message": "Hello!"}\' ' )
241+ print ("" )
242+ print ("Press Ctrl+C to stop the service" )
230243 print ("-" * 60 )
231244
232245
0 commit comments