@@ -33,6 +33,10 @@ public actor ServiceGroup: Sendable {
3333 private let logger : Logger
3434 /// The logging configuration.
3535 private let loggingConfiguration : ServiceGroupConfiguration . LoggingConfiguration
36+ /// The maximum amount of time that graceful shutdown is allowed to take.
37+ private let maximumGracefulShutdownDuration : ( secondsComponent: Int64 , attosecondsComponent: Int64 ) ?
38+ /// The maximum amount of time that task cancellation is allowed to take.
39+ private let maximumCancellationDuration : ( secondsComponent: Int64 , attosecondsComponent: Int64 ) ?
3640 /// The signals that lead to graceful shutdown.
3741 private let gracefulShutdownSignals : [ UnixSignal ]
3842 /// The signals that lead to cancellation.
@@ -57,6 +61,8 @@ public actor ServiceGroup: Sendable {
5761 self . cancellationSignals = configuration. cancellationSignals
5862 self . logger = configuration. logger
5963 self . loggingConfiguration = configuration. logging
64+ self . maximumGracefulShutdownDuration = configuration. _maximumGracefulShutdownDuration
65+ self . maximumCancellationDuration = configuration. _maximumCancellationDuration
6066 }
6167
6268 /// Initializes a new ``ServiceGroup``.
@@ -94,6 +100,8 @@ public actor ServiceGroup: Sendable {
94100 self . cancellationSignals = configuration. cancellationSignals
95101 self . logger = logger
96102 self . loggingConfiguration = configuration. logging
103+ self . maximumGracefulShutdownDuration = configuration. _maximumGracefulShutdownDuration
104+ self . maximumCancellationDuration = configuration. _maximumCancellationDuration
97105 }
98106
99107 /// Runs all the services by spinning up a child task per service.
@@ -176,6 +184,8 @@ public actor ServiceGroup: Sendable {
176184 case signalSequenceFinished
177185 case gracefulShutdownCaught
178186 case gracefulShutdownFinished
187+ case gracefulShutdownTimedOut
188+ case cancellationCaught
179189 }
180190
181191 private func _run(
@@ -191,6 +201,10 @@ public actor ServiceGroup: Sendable {
191201 ]
192202 )
193203
204+ // A task that is spawned when we got cancelled or
205+ // we cancel the task group to keep track of a timeout.
206+ var cancellationTimeoutTask : Task < Void , Never > ?
207+
194208 // Using a result here since we want a task group that has non-throwing child tasks
195209 // but the body itself is throwing
196210 let result = try await withThrowingTaskGroup ( of: ChildTaskResult . self, returning: Result< Void, Error> . self ) { group in
@@ -267,6 +281,13 @@ public actor ServiceGroup: Sendable {
267281 }
268282 }
269283
284+ group. addTask {
285+ // This child task is waiting forever until the group gets cancelled.
286+ let ( stream, _) = AsyncStream . makeStream ( of: Void . self)
287+ await stream. first { _ in true }
288+ return . cancellationCaught
289+ }
290+
270291 // We are storing the services in an optional array now. When a slot in the array is
271292 // empty it indicates that the service has been shutdown.
272293 var services = services. map { Optional ( $0) }
@@ -293,7 +314,7 @@ public actor ServiceGroup: Sendable {
293314 self . loggingConfiguration. keys. serviceKey: " \( service. service) " ,
294315 ]
295316 )
296- group . cancelAll ( )
317+ self . cancelGroupAndSpawnTimeoutIfNeeded ( group : & group , cancellationTimeoutTask : & cancellationTimeoutTask )
297318 return . failure( ServiceGroupError . serviceFinishedUnexpectedly ( ) )
298319
299320 case . gracefullyShutdownGroup:
@@ -307,6 +328,7 @@ public actor ServiceGroup: Sendable {
307328 do {
308329 try await self . shutdownGracefully (
309330 services: services,
331+ cancellationTimeoutTask: & cancellationTimeoutTask,
310332 group: & group,
311333 gracefulShutdownManagers: gracefulShutdownManagers
312334 )
@@ -327,7 +349,7 @@ public actor ServiceGroup: Sendable {
327349 self . logger. debug (
328350 " All services finished. "
329351 )
330- group . cancelAll ( )
352+ self . cancelGroupAndSpawnTimeoutIfNeeded ( group : & group , cancellationTimeoutTask : & cancellationTimeoutTask )
331353 return . success( ( ) )
332354 }
333355 }
@@ -342,7 +364,7 @@ public actor ServiceGroup: Sendable {
342364 self . loggingConfiguration. keys. errorKey: " \( serviceError) " ,
343365 ]
344366 )
345- group . cancelAll ( )
367+ self . cancelGroupAndSpawnTimeoutIfNeeded ( group : & group , cancellationTimeoutTask : & cancellationTimeoutTask )
346368 return . failure( serviceError)
347369
348370 case . gracefullyShutdownGroup:
@@ -358,6 +380,7 @@ public actor ServiceGroup: Sendable {
358380 do {
359381 try await self . shutdownGracefully (
360382 services: services,
383+ cancellationTimeoutTask: & cancellationTimeoutTask,
361384 group: & group,
362385 gracefulShutdownManagers: gracefulShutdownManagers
363386 )
@@ -381,7 +404,7 @@ public actor ServiceGroup: Sendable {
381404 " All services finished. "
382405 )
383406
384- group . cancelAll ( )
407+ self . cancelGroupAndSpawnTimeoutIfNeeded ( group : & group , cancellationTimeoutTask : & cancellationTimeoutTask )
385408 return . success( ( ) )
386409 }
387410 }
@@ -398,6 +421,7 @@ public actor ServiceGroup: Sendable {
398421 do {
399422 try await self . shutdownGracefully (
400423 services: services,
424+ cancellationTimeoutTask: & cancellationTimeoutTask,
401425 group: & group,
402426 gracefulShutdownManagers: gracefulShutdownManagers
403427 )
@@ -413,7 +437,7 @@ public actor ServiceGroup: Sendable {
413437 ]
414438 )
415439
416- group . cancelAll ( )
440+ self . cancelGroupAndSpawnTimeoutIfNeeded ( group : & group , cancellationTimeoutTask : & cancellationTimeoutTask )
417441 }
418442
419443 case . gracefulShutdownCaught:
@@ -423,19 +447,29 @@ public actor ServiceGroup: Sendable {
423447 do {
424448 try await self . shutdownGracefully (
425449 services: services,
450+ cancellationTimeoutTask: & cancellationTimeoutTask,
426451 group: & group,
427452 gracefulShutdownManagers: gracefulShutdownManagers
428453 )
429454 } catch {
430455 return . failure( error)
431456 }
432457
458+ case . cancellationCaught:
459+ // We caught cancellation in our child task so we have to spawn
460+ // our cancellation timeout task if needed
461+ self . logger. debug ( " Caught cancellation. " )
462+ self . cancelGroupAndSpawnTimeoutIfNeeded ( group: & group, cancellationTimeoutTask: & cancellationTimeoutTask)
463+
433464 case . signalSequenceFinished, . gracefulShutdownFinished:
434465 // This can happen when we are either cancelling everything or
435466 // when the user did not specify any shutdown signals. We just have to tolerate
436467 // this.
437468 continue
438469
470+ case . gracefulShutdownTimedOut:
471+ fatalError ( " Received gracefulShutdownTimedOut but never triggered a graceful shutdown " )
472+
439473 case nil :
440474 fatalError ( " Invalid result from group.next(). We checked if the group is empty before and still got nil " )
441475 }
@@ -447,18 +481,30 @@ public actor ServiceGroup: Sendable {
447481 self . logger. debug (
448482 " Service lifecycle ended "
449483 )
484+ cancellationTimeoutTask? . cancel ( )
450485 try result. get ( )
451486 }
452487
453488 private func shutdownGracefully(
454489 services: [ ServiceGroupConfiguration . ServiceConfiguration ? ] ,
490+ cancellationTimeoutTask: inout Task < Void , Never > ? ,
455491 group: inout ThrowingTaskGroup < ChildTaskResult , Error > ,
456492 gracefulShutdownManagers: [ GracefulShutdownManager ]
457493 ) async throws {
458494 guard case . running = self . state else {
459495 fatalError ( " Unexpected state " )
460496 }
461497
498+ if #available( macOS 13 . 0 , iOS 16 . 0 , watchOS 9 . 0 , tvOS 16 . 0 , * ) , let maximumGracefulShutdownDuration = self . maximumGracefulShutdownDuration {
499+ group. addTask {
500+ try ? await Task . sleep ( for: Duration (
501+ secondsComponent: maximumGracefulShutdownDuration. secondsComponent,
502+ attosecondsComponent: maximumGracefulShutdownDuration. attosecondsComponent
503+ ) )
504+ return . gracefulShutdownTimedOut
505+ }
506+ }
507+
462508 // We are storing the first error of a service that threw here.
463509 var error : Error ?
464510
@@ -509,7 +555,7 @@ public actor ServiceGroup: Sendable {
509555 ]
510556 )
511557
512- group . cancelAll ( )
558+ self . cancelGroupAndSpawnTimeoutIfNeeded ( group : & group , cancellationTimeoutTask : & cancellationTimeoutTask )
513559 throw ServiceGroupError . serviceFinishedUnexpectedly ( )
514560 }
515561
@@ -561,9 +607,26 @@ public actor ServiceGroup: Sendable {
561607 ]
562608 )
563609
564- group . cancelAll ( )
610+ self . cancelGroupAndSpawnTimeoutIfNeeded ( group : & group , cancellationTimeoutTask : & cancellationTimeoutTask )
565611 }
566612
613+ case . gracefulShutdownTimedOut:
614+ // Gracefully shutting down took longer than the user configured
615+ // so we have to escalate it now.
616+ self . logger. debug (
617+ " Graceful shutdown took longer than allowed by the configuration. Cancelling the group now. " ,
618+ metadata: [
619+ self . loggingConfiguration. keys. serviceKey: " \( service. service) " ,
620+ ]
621+ )
622+ self . cancelGroupAndSpawnTimeoutIfNeeded ( group: & group, cancellationTimeoutTask: & cancellationTimeoutTask)
623+
624+ case . cancellationCaught:
625+ // We caught cancellation in our child task so we have to spawn
626+ // our cancellation timeout task if needed
627+ self . logger. debug ( " Caught cancellation. " )
628+ self . cancelGroupAndSpawnTimeoutIfNeeded ( group: & group, cancellationTimeoutTask: & cancellationTimeoutTask)
629+
567630 case . signalSequenceFinished, . gracefulShutdownCaught, . gracefulShutdownFinished:
568631 // We just have to tolerate this since signals and parent graceful shutdowns downs can race.
569632 continue
@@ -575,7 +638,9 @@ public actor ServiceGroup: Sendable {
575638
576639 // If we hit this then all services are shutdown. The only thing remaining
577640 // are the tasks that listen to the various graceful shutdown signals. We
578- // just have to cancel those
641+ // just have to cancel those.
642+ // In this case we don't have to spawn our cancellation timeout task since
643+ // we are sure all other child tasks are handling cancellation appropriately.
579644 group. cancelAll ( )
580645
581646 // If we saw an error during graceful shutdown from a service that triggers graceful
@@ -584,6 +649,45 @@ public actor ServiceGroup: Sendable {
584649 throw error
585650 }
586651 }
652+
653+ private func cancelGroupAndSpawnTimeoutIfNeeded(
654+ group: inout ThrowingTaskGroup < ChildTaskResult , Error > ,
655+ cancellationTimeoutTask: inout Task < Void , Never > ?
656+ ) {
657+ guard cancellationTimeoutTask == nil else {
658+ // We already have a cancellation timeout task running.
659+ self . logger. debug (
660+ " Task cancellation timeout task already running. "
661+ )
662+ return
663+ }
664+ group. cancelAll ( )
665+
666+ if #available( macOS 13 . 0 , iOS 16 . 0 , watchOS 9 . 0 , tvOS 16 . 0 , * ) , let maximumCancellationDuration = self . maximumCancellationDuration {
667+ // We have to spawn an unstructured task here because the call to our `run`
668+ // method might have already been cancelled and we need to protect the sleep
669+ // from being cancelled.
670+ cancellationTimeoutTask = Task {
671+ do {
672+ self . logger. debug (
673+ " Task cancellation timeout task started. "
674+ )
675+ try await Task . sleep ( for: Duration (
676+ secondsComponent: maximumCancellationDuration. secondsComponent,
677+ attosecondsComponent: maximumCancellationDuration. attosecondsComponent
678+ ) )
679+ self . logger. debug (
680+ " Cancellation took longer than allowed by the configuration. "
681+ )
682+ fatalError ( " Cancellation took longer than allowed by the configuration. " )
683+ } catch {
684+ // We got cancelled so our services must have finished up.
685+ }
686+ }
687+ } else {
688+ cancellationTimeoutTask = nil
689+ }
690+ }
587691}
588692
589693// This should be removed once we support Swift 5.9+
0 commit comments