Skip to content

Commit 1262923

Browse files
authored
Merge pull request #3904 from olamilekan000/add-timeout-context-to-progress-monitoring
add timeout to cloud init progress monitoring
2 parents f7861fb + 3f8402c commit 1262923

File tree

2 files changed

+76
-44
lines changed

2 files changed

+76
-44
lines changed

pkg/hostagent/hostagent.go

Lines changed: 75 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ type HostAgent struct {
7777
guestAgentAliveChOnce sync.Once
7878

7979
showProgress bool // whether to show cloud-init progress
80+
81+
statusMu sync.RWMutex
82+
currentStatus events.Status
8083
}
8184

8285
type options struct {
@@ -285,6 +288,11 @@ func determineSSHLocalPort(confLocalPort int, instName, limaVersion string) (int
285288
func (a *HostAgent) emitEvent(_ context.Context, ev events.Event) {
286289
a.eventEncMu.Lock()
287290
defer a.eventEncMu.Unlock()
291+
292+
a.statusMu.Lock()
293+
a.currentStatus = ev.Status
294+
a.statusMu.Unlock()
295+
288296
if ev.Time.IsZero() {
289297
ev.Time = time.Now()
290298
}
@@ -293,6 +301,17 @@ func (a *HostAgent) emitEvent(_ context.Context, ev events.Event) {
293301
}
294302
}
295303

304+
func (a *HostAgent) emitCloudInitProgressEvent(ctx context.Context, progress *events.CloudInitProgress) {
305+
a.statusMu.RLock()
306+
currentStatus := a.currentStatus
307+
a.statusMu.RUnlock()
308+
309+
currentStatus.CloudInitProgress = progress
310+
311+
ev := events.Event{Status: currentStatus}
312+
a.emitEvent(ctx, ev)
313+
}
314+
296315
func generatePassword(length int) (string, error) {
297316
// avoid any special symbols, to make it easier to copy/paste
298317
return password.Generate(length, length/4, 0, false, false)
@@ -513,7 +532,10 @@ sudo chown -R "${USER}" /run/host-services`
513532
if a.showProgress {
514533
cloudInitDone := make(chan struct{})
515534
go func() {
516-
a.watchCloudInitProgress(ctx)
535+
timeoutCtx, cancel := context.WithTimeout(ctx, 10*time.Minute)
536+
defer cancel()
537+
538+
a.watchCloudInitProgress(timeoutCtx)
517539
close(cloudInitDone)
518540
}()
519541

@@ -820,15 +842,22 @@ func forwardSSH(ctx context.Context, sshConfig *ssh.SSHConfig, port int, local,
820842
}
821843

822844
func (a *HostAgent) watchCloudInitProgress(ctx context.Context) {
845+
exitReason := "Cloud-init monitoring completed successfully"
846+
var cmd *exec.Cmd
847+
848+
defer func() {
849+
a.emitCloudInitProgressEvent(context.Background(), &events.CloudInitProgress{
850+
Active: false,
851+
Completed: true,
852+
LogLine: exitReason,
853+
})
854+
logrus.Debug("Cloud-init progress monitoring completed")
855+
}()
856+
823857
logrus.Debug("Starting cloud-init progress monitoring")
824858

825-
a.emitEvent(ctx, events.Event{
826-
Status: events.Status{
827-
SSHLocalPort: a.sshLocalPort,
828-
CloudInitProgress: &events.CloudInitProgress{
829-
Active: true,
830-
},
831-
},
859+
a.emitCloudInitProgressEvent(ctx, &events.CloudInitProgress{
860+
Active: true,
832861
})
833862

834863
maxRetries := 30
@@ -866,15 +895,17 @@ func (a *HostAgent) watchCloudInitProgress(ctx context.Context) {
866895
"sudo", "tail", "-n", "+1", "-f", "/var/log/cloud-init-output.log",
867896
)
868897

869-
cmd := exec.CommandContext(ctx, a.sshConfig.Binary(), args...)
898+
cmd = exec.CommandContext(ctx, a.sshConfig.Binary(), args...)
870899
stdout, err := cmd.StdoutPipe()
871900
if err != nil {
872901
logrus.WithError(err).Warn("Failed to create stdout pipe for cloud-init monitoring")
902+
exitReason = "Failed to create stdout pipe for cloud-init monitoring"
873903
return
874904
}
875905

876906
if err := cmd.Start(); err != nil {
877907
logrus.WithError(err).Warn("Failed to start cloud-init monitoring command")
908+
exitReason = "Failed to start cloud-init monitoring command"
878909
return
879910
}
880911

@@ -887,23 +918,37 @@ func (a *HostAgent) watchCloudInitProgress(ctx context.Context) {
887918
continue
888919
}
889920

890-
if strings.Contains(line, "Cloud-init") && strings.Contains(line, "finished") {
891-
cloudInitFinished = true
921+
if !cloudInitFinished {
922+
if isCloudInitFinished(line) {
923+
logrus.Debug("Cloud-init completion detected via log pattern")
924+
cloudInitFinished = true
925+
}
892926
}
893927

894-
a.emitEvent(ctx, events.Event{
895-
Status: events.Status{
896-
SSHLocalPort: a.sshLocalPort,
897-
CloudInitProgress: &events.CloudInitProgress{
898-
Active: !cloudInitFinished,
899-
LogLine: line,
900-
Completed: cloudInitFinished,
901-
},
902-
},
928+
a.emitCloudInitProgressEvent(ctx, &events.CloudInitProgress{
929+
Active: !cloudInitFinished,
930+
LogLine: line,
931+
Completed: cloudInitFinished,
903932
})
933+
934+
if cloudInitFinished {
935+
logrus.Debug("Breaking from cloud-init monitoring loop - completion detected")
936+
if cmd.Process != nil {
937+
logrus.Debug("Killing cloud-init monitoring process after completion")
938+
if err := cmd.Process.Kill(); err != nil {
939+
logrus.WithError(err).Debug("Failed to kill cloud-init monitoring process")
940+
}
941+
}
942+
break
943+
}
904944
}
905945

906946
if err := cmd.Wait(); err != nil {
947+
if ctx.Err() == context.DeadlineExceeded {
948+
logrus.Warn("Cloud-init monitoring timed out after 10 minutes")
949+
exitReason = "Cloud-init monitoring timed out after 10 minutes"
950+
return
951+
}
907952
logrus.WithError(err).Debug("SSH command finished (expected when cloud-init completes)")
908953
}
909954

@@ -922,36 +967,24 @@ func (a *HostAgent) watchCloudInitProgress(ctx context.Context) {
922967
lines := strings.Split(string(finalOutput), "\n")
923968
for _, line := range lines {
924969
if strings.TrimSpace(line) != "" {
925-
if strings.Contains(line, "Cloud-init") && strings.Contains(line, "finished") {
926-
cloudInitFinished = true
970+
if !cloudInitFinished {
971+
cloudInitFinished = isCloudInitFinished(line)
927972
}
928973

929-
a.emitEvent(ctx, events.Event{
930-
Status: events.Status{
931-
SSHLocalPort: a.sshLocalPort,
932-
CloudInitProgress: &events.CloudInitProgress{
933-
Active: !cloudInitFinished,
934-
LogLine: line,
935-
Completed: cloudInitFinished,
936-
},
937-
},
974+
a.emitCloudInitProgressEvent(ctx, &events.CloudInitProgress{
975+
Active: !cloudInitFinished,
976+
LogLine: line,
977+
Completed: cloudInitFinished,
938978
})
939979
}
940980
}
941981
}
942982
}
983+
}
943984

944-
a.emitEvent(ctx, events.Event{
945-
Status: events.Status{
946-
SSHLocalPort: a.sshLocalPort,
947-
CloudInitProgress: &events.CloudInitProgress{
948-
Active: false,
949-
Completed: true,
950-
},
951-
},
952-
})
953-
954-
logrus.Debug("Cloud-init progress monitoring completed")
985+
func isCloudInitFinished(line string) bool {
986+
line = strings.ToLower(strings.TrimSpace(line))
987+
return strings.Contains(line, "cloud-init") && strings.Contains(line, "finished")
955988
}
956989

957990
func copyToHost(ctx context.Context, sshConfig *ssh.SSHConfig, port int, local, remote string) error {

pkg/instance/start.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -332,9 +332,9 @@ func watchHostAgentEvents(ctx context.Context, inst *limatype.Instance, haStdout
332332

333333
if progress.Completed {
334334
cloudInitCompleted = true
335+
logrus.Infof("Cloud-init progress monitoring done.")
335336
}
336337
}
337-
338338
if len(ev.Status.Errors) > 0 {
339339
logrus.Errorf("%+v", ev.Status.Errors)
340340
}
@@ -355,7 +355,6 @@ func watchHostAgentEvents(ctx context.Context, inst *limatype.Instance, haStdout
355355
}
356356

357357
if showProgress && !cloudInitCompleted {
358-
logrus.Infof("VM is running, waiting for cloud-init to complete...")
359358
return false
360359
}
361360

0 commit comments

Comments
 (0)