Skip to content

Commit fb338e7

Browse files
ayushr2gvisor-bot
authored andcommitted
Mark restore as failed when any runsc restore fails.
Followup from 0f83514 ("Handle restore failure more robustly.") PiperOrigin-RevId: 832089278
1 parent 2b2f897 commit fb338e7

File tree

2 files changed

+14
-4
lines changed

2 files changed

+14
-4
lines changed

runsc/boot/controller.go

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -537,7 +537,7 @@ type RestoreOpts struct {
537537
// The container's current kernel is destroyed, a restore environment is
538538
// created, and the kernel is recreated with the restore state file. The
539539
// container then sends the signal to start.
540-
func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
540+
func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) (retErr error) {
541541
timer := starttime.Timer("Restore")
542542
timer.Reached("cm.Restore RPC")
543543
log.Debugf("containerManager.Restore")
@@ -547,9 +547,15 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
547547
cu := cleanup.Make(cm.l.mu.Unlock)
548548
defer cu.Clean()
549549

550-
if cm.l.state > created {
550+
if cm.l.state != created {
551551
return fmt.Errorf("cannot restore a container in state=%s", cm.l.state)
552552
}
553+
defer func() {
554+
if retErr != nil {
555+
cu.Clean() // Release `cm.l.mu` as onRestoreFailed will acquire it.
556+
cm.onRestoreFailed(fmt.Errorf("Restore failed: %w", retErr))
557+
}
558+
}()
553559
if len(o.Files) == 0 {
554560
return fmt.Errorf("at least one file must be passed to Restore")
555561
}
@@ -703,7 +709,7 @@ func (cm *containerManager) onRestoreDone() {
703709
cm.restorer = nil
704710
}
705711

706-
func (cm *containerManager) RestoreSubcontainer(args *StartArgs, _ *struct{}) error {
712+
func (cm *containerManager) RestoreSubcontainer(args *StartArgs, _ *struct{}) (retErr error) {
707713
timeline := timing.OrphanTimeline(fmt.Sprintf("cont:%s", args.CID[0:min(8, len(args.CID))]), gtime.Now()).Lease()
708714
defer timeline.End()
709715
log.Debugf("containerManager.RestoreSubcontainer, cid: %s, args: %+v", args.CID, args)
@@ -714,6 +720,11 @@ func (cm *containerManager) RestoreSubcontainer(args *StartArgs, _ *struct{}) er
714720
if state != restoringUnstarted {
715721
return fmt.Errorf("sandbox is not being restored, cannot restore subcontainer: state=%s", state)
716722
}
723+
defer func() {
724+
if retErr != nil {
725+
cm.onRestoreFailed(fmt.Errorf("RestoreSubcontainer failed: %w", retErr))
726+
}
727+
}()
717728

718729
// Validate arguments.
719730
if args.Spec == nil {

runsc/boot/restore.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,6 @@ func (r *restorer) restoreContainerInfo(l *Loader, info *containerInfo, containe
159159
if len(r.containers) == r.totalContainers {
160160
// Trigger the restore if this is the last container.
161161
if err := r.restore(l); err != nil {
162-
r.cm.onRestoreFailed(err)
163162
return err
164163
}
165164
}

0 commit comments

Comments
 (0)