Skip to content

Commit e5503eb

Browse files
ErikKaumNarsil
andauthored
configurable termination timeout (#3126)
* make shard and webserver termination timeouts configurable * Updating documentation. * Fmt. --------- Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
1 parent e497bc0 commit e5503eb

File tree

2 files changed

+31
-2
lines changed

2 files changed

+31
-2
lines changed

docs/source/reference/launcher.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,15 @@ Options:
477477

478478
[env: ENABLE_PREFILL_LOGPROBS=]
479479

480+
```
481+
## GRACEFUL_TERMINATION_TIMEOUT
482+
```shell
483+
-g, --graceful-termination-timeout <GRACEFUL_TERMINATION_TIMEOUT>
484+
Change timeout of graceful termination of the TGI server
485+
486+
[env: GRACEFUL_TERMINATION_TIMEOUT=]
487+
[default: 90]
488+
480489
```
481490
## HELP
482491
```shell

launcher/src/main.rs

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -892,6 +892,10 @@ struct Args {
892892
/// Using this flag reallows users to ask for them.
893893
#[clap(long, env)]
894894
enable_prefill_logprobs: bool,
895+
896+
/// Change timeout of graceful termination of the TGI server
897+
#[clap(default_value = "90", long, short, env)]
898+
graceful_termination_timeout: u64,
895899
}
896900

897901
#[derive(Debug)]
@@ -933,6 +937,7 @@ fn shard_manager(
933937
log_level: LevelFilter,
934938
status_sender: mpsc::Sender<ShardStatus>,
935939
shutdown: Arc<AtomicBool>,
940+
graceful_termination_timeout: u64,
936941
_shutdown_sender: mpsc::Sender<()>,
937942
) {
938943
// Enter shard-manager tracing span
@@ -1206,7 +1211,12 @@ fn shard_manager(
12061211

12071212
// We received a shutdown signal
12081213
if shutdown.load(Ordering::SeqCst) {
1209-
terminate("shard", p, Duration::from_secs(90)).unwrap();
1214+
terminate(
1215+
"shard",
1216+
p,
1217+
Duration::from_secs(graceful_termination_timeout),
1218+
)
1219+
.unwrap();
12101220
return;
12111221
}
12121222

@@ -1545,6 +1555,7 @@ fn spawn_shards(
15451555
status_receiver: &mpsc::Receiver<ShardStatus>,
15461556
status_sender: mpsc::Sender<ShardStatus>,
15471557
running: Arc<AtomicBool>,
1558+
graceful_termination_timeout: u64,
15481559
) -> Result<(), LauncherError> {
15491560
// Start shard processes
15501561
for rank in 0..num_shard {
@@ -1612,6 +1623,7 @@ fn spawn_shards(
16121623
max_log_level,
16131624
status_sender,
16141625
shutdown,
1626+
graceful_termination_timeout,
16151627
shutdown_sender,
16161628
)
16171629
});
@@ -1999,6 +2011,8 @@ fn main() -> Result<(), LauncherError> {
19992011
// Pattern match configuration
20002012
let args: Args = Args::parse();
20012013

2014+
let graceful_termination_timeout = args.graceful_termination_timeout;
2015+
20022016
// Filter events with LOG_LEVEL
20032017
let varname = "LOG_LEVEL";
20042018
let env_filter = if let Ok(log_level) = std::env::var(varname) {
@@ -2263,6 +2277,7 @@ fn main() -> Result<(), LauncherError> {
22632277
&status_receiver,
22642278
status_sender,
22652279
running.clone(),
2280+
graceful_termination_timeout,
22662281
)?;
22672282

22682283
// We might have received a termination signal
@@ -2307,7 +2322,12 @@ fn main() -> Result<(), LauncherError> {
23072322
}
23082323

23092324
// Graceful termination
2310-
terminate("webserver", webserver, Duration::from_secs(90)).unwrap();
2325+
terminate(
2326+
"webserver",
2327+
webserver,
2328+
Duration::from_secs(graceful_termination_timeout),
2329+
)
2330+
.unwrap();
23112331
shutdown_shards(shutdown, &shutdown_receiver);
23122332

23132333
exit_code

0 commit comments

Comments
 (0)