@@ -892,6 +892,10 @@ struct Args {
892892 /// Using this flag reallows users to ask for them.
893893 #[ clap( long, env) ]
894894 enable_prefill_logprobs : bool ,
895+
896+ /// Change timeout of graceful termination of the TGI server
897+ #[ clap( default_value = "90" , long, short, env) ]
898+ graceful_termination_timeout : u64 ,
895899}
896900
897901#[ derive( Debug ) ]
@@ -933,6 +937,7 @@ fn shard_manager(
933937 log_level : LevelFilter ,
934938 status_sender : mpsc:: Sender < ShardStatus > ,
935939 shutdown : Arc < AtomicBool > ,
940+ graceful_termination_timeout : u64 ,
936941 _shutdown_sender : mpsc:: Sender < ( ) > ,
937942) {
938943 // Enter shard-manager tracing span
@@ -1206,7 +1211,12 @@ fn shard_manager(
12061211
12071212 // We received a shutdown signal
12081213 if shutdown. load ( Ordering :: SeqCst ) {
1209- terminate ( "shard" , p, Duration :: from_secs ( 90 ) ) . unwrap ( ) ;
1214+ terminate (
1215+ "shard" ,
1216+ p,
1217+ Duration :: from_secs ( graceful_termination_timeout) ,
1218+ )
1219+ . unwrap ( ) ;
12101220 return ;
12111221 }
12121222
@@ -1545,6 +1555,7 @@ fn spawn_shards(
15451555 status_receiver : & mpsc:: Receiver < ShardStatus > ,
15461556 status_sender : mpsc:: Sender < ShardStatus > ,
15471557 running : Arc < AtomicBool > ,
1558+ graceful_termination_timeout : u64 ,
15481559) -> Result < ( ) , LauncherError > {
15491560 // Start shard processes
15501561 for rank in 0 ..num_shard {
@@ -1612,6 +1623,7 @@ fn spawn_shards(
16121623 max_log_level,
16131624 status_sender,
16141625 shutdown,
1626+ graceful_termination_timeout,
16151627 shutdown_sender,
16161628 )
16171629 } ) ;
@@ -1999,6 +2011,8 @@ fn main() -> Result<(), LauncherError> {
19992011 // Pattern match configuration
20002012 let args: Args = Args :: parse ( ) ;
20012013
2014+ let graceful_termination_timeout = args. graceful_termination_timeout ;
2015+
20022016 // Filter events with LOG_LEVEL
20032017 let varname = "LOG_LEVEL" ;
20042018 let env_filter = if let Ok ( log_level) = std:: env:: var ( varname) {
@@ -2263,6 +2277,7 @@ fn main() -> Result<(), LauncherError> {
22632277 & status_receiver,
22642278 status_sender,
22652279 running. clone ( ) ,
2280+ graceful_termination_timeout,
22662281 ) ?;
22672282
22682283 // We might have received a termination signal
@@ -2307,7 +2322,12 @@ fn main() -> Result<(), LauncherError> {
23072322 }
23082323
23092324 // Graceful termination
2310- terminate ( "webserver" , webserver, Duration :: from_secs ( 90 ) ) . unwrap ( ) ;
2325+ terminate (
2326+ "webserver" ,
2327+ webserver,
2328+ Duration :: from_secs ( graceful_termination_timeout) ,
2329+ )
2330+ . unwrap ( ) ;
23112331 shutdown_shards ( shutdown, & shutdown_receiver) ;
23122332
23132333 exit_code
0 commit comments