Skip to content

Commit 3f8402c

Browse files
committed
add timeout to cloud init progress monitoring
Signed-off-by: olalekan odukoya <odukoyaonline@gmail.com>
1 parent d4f4911 commit 3f8402c

File tree

2 files changed

+76
-44
lines changed

2 files changed

+76
-44
lines changed

pkg/hostagent/hostagent.go

Lines changed: 75 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ type HostAgent struct {
7676
guestAgentAliveChOnce sync.Once
7777

7878
showProgress bool // whether to show cloud-init progress
79+
80+
statusMu sync.RWMutex
81+
currentStatus events.Status
7982
}
8083

8184
type options struct {
@@ -284,6 +287,11 @@ func determineSSHLocalPort(confLocalPort int, instName, limaVersion string) (int
284287
func (a *HostAgent) emitEvent(_ context.Context, ev events.Event) {
285288
a.eventEncMu.Lock()
286289
defer a.eventEncMu.Unlock()
290+
291+
a.statusMu.Lock()
292+
a.currentStatus = ev.Status
293+
a.statusMu.Unlock()
294+
287295
if ev.Time.IsZero() {
288296
ev.Time = time.Now()
289297
}
@@ -292,6 +300,17 @@ func (a *HostAgent) emitEvent(_ context.Context, ev events.Event) {
292300
}
293301
}
294302

303+
func (a *HostAgent) emitCloudInitProgressEvent(ctx context.Context, progress *events.CloudInitProgress) {
304+
a.statusMu.RLock()
305+
currentStatus := a.currentStatus
306+
a.statusMu.RUnlock()
307+
308+
currentStatus.CloudInitProgress = progress
309+
310+
ev := events.Event{Status: currentStatus}
311+
a.emitEvent(ctx, ev)
312+
}
313+
295314
func generatePassword(length int) (string, error) {
296315
// avoid any special symbols, to make it easier to copy/paste
297316
return password.Generate(length, length/4, 0, false, false)
@@ -509,7 +528,10 @@ sudo chown -R "${USER}" /run/host-services`
509528
if a.showProgress {
510529
cloudInitDone := make(chan struct{})
511530
go func() {
512-
a.watchCloudInitProgress(ctx)
531+
timeoutCtx, cancel := context.WithTimeout(ctx, 10*time.Minute)
532+
defer cancel()
533+
534+
a.watchCloudInitProgress(timeoutCtx)
513535
close(cloudInitDone)
514536
}()
515537

@@ -816,15 +838,22 @@ func forwardSSH(ctx context.Context, sshConfig *ssh.SSHConfig, port int, local,
816838
}
817839

818840
func (a *HostAgent) watchCloudInitProgress(ctx context.Context) {
841+
exitReason := "Cloud-init monitoring completed successfully"
842+
var cmd *exec.Cmd
843+
844+
defer func() {
845+
a.emitCloudInitProgressEvent(context.Background(), &events.CloudInitProgress{
846+
Active: false,
847+
Completed: true,
848+
LogLine: exitReason,
849+
})
850+
logrus.Debug("Cloud-init progress monitoring completed")
851+
}()
852+
819853
logrus.Debug("Starting cloud-init progress monitoring")
820854

821-
a.emitEvent(ctx, events.Event{
822-
Status: events.Status{
823-
SSHLocalPort: a.sshLocalPort,
824-
CloudInitProgress: &events.CloudInitProgress{
825-
Active: true,
826-
},
827-
},
855+
a.emitCloudInitProgressEvent(ctx, &events.CloudInitProgress{
856+
Active: true,
828857
})
829858

830859
maxRetries := 30
@@ -862,15 +891,17 @@ func (a *HostAgent) watchCloudInitProgress(ctx context.Context) {
862891
"sudo", "tail", "-n", "+1", "-f", "/var/log/cloud-init-output.log",
863892
)
864893

865-
cmd := exec.CommandContext(ctx, a.sshConfig.Binary(), args...)
894+
cmd = exec.CommandContext(ctx, a.sshConfig.Binary(), args...)
866895
stdout, err := cmd.StdoutPipe()
867896
if err != nil {
868897
logrus.WithError(err).Warn("Failed to create stdout pipe for cloud-init monitoring")
898+
exitReason = "Failed to create stdout pipe for cloud-init monitoring"
869899
return
870900
}
871901

872902
if err := cmd.Start(); err != nil {
873903
logrus.WithError(err).Warn("Failed to start cloud-init monitoring command")
904+
exitReason = "Failed to start cloud-init monitoring command"
874905
return
875906
}
876907

@@ -883,23 +914,37 @@ func (a *HostAgent) watchCloudInitProgress(ctx context.Context) {
883914
continue
884915
}
885916

886-
if strings.Contains(line, "Cloud-init") && strings.Contains(line, "finished") {
887-
cloudInitFinished = true
917+
if !cloudInitFinished {
918+
if isCloudInitFinished(line) {
919+
logrus.Debug("Cloud-init completion detected via log pattern")
920+
cloudInitFinished = true
921+
}
888922
}
889923

890-
a.emitEvent(ctx, events.Event{
891-
Status: events.Status{
892-
SSHLocalPort: a.sshLocalPort,
893-
CloudInitProgress: &events.CloudInitProgress{
894-
Active: !cloudInitFinished,
895-
LogLine: line,
896-
Completed: cloudInitFinished,
897-
},
898-
},
924+
a.emitCloudInitProgressEvent(ctx, &events.CloudInitProgress{
925+
Active: !cloudInitFinished,
926+
LogLine: line,
927+
Completed: cloudInitFinished,
899928
})
929+
930+
if cloudInitFinished {
931+
logrus.Debug("Breaking from cloud-init monitoring loop - completion detected")
932+
if cmd.Process != nil {
933+
logrus.Debug("Killing cloud-init monitoring process after completion")
934+
if err := cmd.Process.Kill(); err != nil {
935+
logrus.WithError(err).Debug("Failed to kill cloud-init monitoring process")
936+
}
937+
}
938+
break
939+
}
900940
}
901941

902942
if err := cmd.Wait(); err != nil {
943+
if ctx.Err() == context.DeadlineExceeded {
944+
logrus.Warn("Cloud-init monitoring timed out after 10 minutes")
945+
exitReason = "Cloud-init monitoring timed out after 10 minutes"
946+
return
947+
}
903948
logrus.WithError(err).Debug("SSH command finished (expected when cloud-init completes)")
904949
}
905950

@@ -918,36 +963,24 @@ func (a *HostAgent) watchCloudInitProgress(ctx context.Context) {
918963
lines := strings.Split(string(finalOutput), "\n")
919964
for _, line := range lines {
920965
if strings.TrimSpace(line) != "" {
921-
if strings.Contains(line, "Cloud-init") && strings.Contains(line, "finished") {
922-
cloudInitFinished = true
966+
if !cloudInitFinished {
967+
cloudInitFinished = isCloudInitFinished(line)
923968
}
924969

925-
a.emitEvent(ctx, events.Event{
926-
Status: events.Status{
927-
SSHLocalPort: a.sshLocalPort,
928-
CloudInitProgress: &events.CloudInitProgress{
929-
Active: !cloudInitFinished,
930-
LogLine: line,
931-
Completed: cloudInitFinished,
932-
},
933-
},
970+
a.emitCloudInitProgressEvent(ctx, &events.CloudInitProgress{
971+
Active: !cloudInitFinished,
972+
LogLine: line,
973+
Completed: cloudInitFinished,
934974
})
935975
}
936976
}
937977
}
938978
}
979+
}
939980

940-
a.emitEvent(ctx, events.Event{
941-
Status: events.Status{
942-
SSHLocalPort: a.sshLocalPort,
943-
CloudInitProgress: &events.CloudInitProgress{
944-
Active: false,
945-
Completed: true,
946-
},
947-
},
948-
})
949-
950-
logrus.Debug("Cloud-init progress monitoring completed")
981+
func isCloudInitFinished(line string) bool {
982+
line = strings.ToLower(strings.TrimSpace(line))
983+
return strings.Contains(line, "cloud-init") && strings.Contains(line, "finished")
951984
}
952985

953986
func copyToHost(ctx context.Context, sshConfig *ssh.SSHConfig, port int, local, remote string) error {

pkg/instance/start.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -332,9 +332,9 @@ func watchHostAgentEvents(ctx context.Context, inst *store.Instance, haStdoutPat
332332

333333
if progress.Completed {
334334
cloudInitCompleted = true
335+
logrus.Infof("Cloud-init progress monitoring done.")
335336
}
336337
}
337-
338338
if len(ev.Status.Errors) > 0 {
339339
logrus.Errorf("%+v", ev.Status.Errors)
340340
}
@@ -355,7 +355,6 @@ func watchHostAgentEvents(ctx context.Context, inst *store.Instance, haStdoutPat
355355
}
356356

357357
if showProgress && !cloudInitCompleted {
358-
logrus.Infof("VM is running, waiting for cloud-init to complete...")
359358
return false
360359
}
361360

0 commit comments

Comments
 (0)