@@ -204,6 +204,14 @@ class LlamaRunner:
204204 def _add_llama_args (self , cmd : List [str ], args : argparse .Namespace ) -> List [str ]:
205205 """Add LLaMA-specific arguments to the command."""
206206
207+ # For service mode, only pass the model path and max-tokens
208+ if hasattr (args , 'service' ) and args .service :
209+ llama_args = [
210+ "--model" , args .model_path ,
211+ "--max-tokens" , str (args .max_tokens ),
212+ ]
213+ return cmd + llama_args
214+
207215 llama_args = [
208216 "--model" , args .model_path ,
209217 "--temperature" , str (args .temperature ),
@@ -238,14 +246,19 @@ class LlamaRunner:
238246
239247 # Show service-specific information
240248 if args .service :
241- print ("Starting TornadoVM LLM REST API Service..." )
249+ print ("Starting GPULlama3.java REST API Service..." )
242250 print (f"Model: { args .model_path } " )
243- print ("API endpoints will be available at:" )
244- print (" - http://localhost:8080/v1/completions" )
245- print (" - http://localhost:8080/v1/completions/stream" )
246- print (" - http://localhost:8080/v1/models" )
247- print (" - http://localhost:8080/v1/health" )
248- print ("\n Press Ctrl+C to stop the service" )
251+ print ("API endpoints available at:" )
252+ print (" - http://localhost:8080/chat" )
253+ print (" - http://localhost:8080/chat/stream" )
254+ print (" - http://localhost:8080/health" )
255+ print ("" )
256+ print ("Example usage:" )
257+ print (' curl -X POST http://localhost:8080/chat \\ ' )
258+ print (' -H "Content-Type: application/json" \\ ' )
259+ print (' -d \' {"message": "Hello!"}\' ' )
260+ print ("" )
261+ print ("Press Ctrl+C to stop the service" )
249262 print ("-" * 60 )
250263
251264
0 commit comments