From 03d7854a63fddcf9a3d6df8a4d341e3cab6f0082 Mon Sep 17 00:00:00 2001 From: reggie-k Date: Sun, 14 Sep 2025 12:55:10 +0300 Subject: [PATCH 01/15] debug statements Signed-off-by: reggie-k --- util/exec/exec.go | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/util/exec/exec.go b/util/exec/exec.go index 0df69aeba7f9b..9c9098f812d11 100644 --- a/util/exec/exec.go +++ b/util/exec/exec.go @@ -6,6 +6,7 @@ import ( "fmt" "os" "os/exec" + "path/filepath" "strconv" "strings" "syscall" @@ -183,6 +184,21 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) { args := strings.Join(cmd.Args, " ") logCtx.WithFields(logrus.Fields{"dir": cmd.Dir}).Info(redactor(args)) + // Helper: debug whether HEAD.lock exists under the current working directory + logHeadLockStatus := func(where string) { + if cmd.Dir == "" { + return + } + lockPath := filepath.Join(cmd.Dir, ".git", "HEAD.lock") + _, statErr := os.Stat(lockPath) + exists := statErr == nil + logCtx.WithFields(logrus.Fields{ + "headLockPath": lockPath, + "headLockExists": exists, + "where": where, + }).Info("HEAD.lock status " + execId) + } + var stdout bytes.Buffer var stderr bytes.Buffer cmd.Stdout = &stdout @@ -190,6 +206,7 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) { start := time.Now() err = cmd.Start() + logCtx.Info("*************************************** EXEC COMMAND STARTED *************************************** " + execId) if err != nil { return "", err } @@ -228,13 +245,16 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) { select { // noinspection ALL case <-timoutCh: + logCtx.Info("*************************************** EXEC TIMEOUT HAPPENED *************************************** " + execId) // send timeout signal _ = cmd.Process.Signal(timeoutBehavior.Signal) // wait on timeout signal and fallback to fatal timeout signal if timeoutBehavior.ShouldWait { + logCtx.Info("*************************************** EXEC WAIT HAPPENED *************************************** " + execId) select { case <-done: case <-fatalTimeoutCh: + logCtx.Info("*************************************** EXEC FATAL TIMEOUT HAPPENED *************************************** " + execId) // upgrades to SIGKILL if cmd does not respect SIGTERM _ = cmd.Process.Signal(fatalTimeoutBehaviour) // now original cmd should exit immediately after SIGKILL @@ -245,6 +265,7 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) { output += stderr.String() } logCtx.WithFields(logrus.Fields{"duration": time.Since(start)}).Debug(redactor(output)) + logHeadLockStatus("fatal-timeout") err = newCmdError(redactor(args), fmt.Errorf("fatal timeout after %v", timeout+fatalTimeout), "") logCtx.Error(err.Error()) return strings.TrimSuffix(output, "\n"), err @@ -256,11 +277,14 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) { output += stderr.String() } logCtx.WithFields(logrus.Fields{"duration": time.Since(start)}).Debug(redactor(output)) + logHeadLockStatus("timeout") err = newCmdError(redactor(args), fmt.Errorf("timeout after %v", timeout), "") logCtx.Error(err.Error()) return strings.TrimSuffix(output, "\n"), err case err := <-done: + logCtx.Info("*************************************** FINISHED ON TIME *************************************** " + execId) if err != nil { + logCtx.Error("*************************************** FINISHED ON TIME EXEC FAILED *************************************** " + execId) output := stdout.String() if opts.CaptureStderr { output += stderr.String() @@ -270,6 +294,7 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) { if !opts.SkipErrorLogging { logCtx.Error(err.Error()) } + logHeadLockStatus("done-error") return strings.TrimSuffix(output, "\n"), err } } @@ -278,6 +303,7 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) { output += stderr.String() } logCtx.WithFields(logrus.Fields{"duration": time.Since(start)}).Debug(redactor(output)) + logHeadLockStatus("done-success") return strings.TrimSuffix(output, "\n"), nil } From 298470511369c1d2e7cc6c4c9e297dbdd6a73dbd Mon Sep 17 00:00:00 2001 From: reggie-k Date: Sun, 14 Sep 2025 13:32:45 +0300 Subject: [PATCH 02/15] debug statements Signed-off-by: reggie-k --- util/exec/exec.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/util/exec/exec.go b/util/exec/exec.go index 9c9098f812d11..b4f74d2e1251c 100644 --- a/util/exec/exec.go +++ b/util/exec/exec.go @@ -196,7 +196,7 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) { "headLockPath": lockPath, "headLockExists": exists, "where": where, - }).Info("HEAD.lock status " + execId) + }).Info("HEAD.lock status execId=" + execId) } var stdout bytes.Buffer @@ -206,7 +206,6 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) { start := time.Now() err = cmd.Start() - logCtx.Info("*************************************** EXEC COMMAND STARTED *************************************** " + execId) if err != nil { return "", err } @@ -245,16 +244,13 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) { select { // noinspection ALL case <-timoutCh: - logCtx.Info("*************************************** EXEC TIMEOUT HAPPENED *************************************** " + execId) // send timeout signal _ = cmd.Process.Signal(timeoutBehavior.Signal) // wait on timeout signal and fallback to fatal timeout signal if timeoutBehavior.ShouldWait { - logCtx.Info("*************************************** EXEC WAIT HAPPENED *************************************** " + execId) select { case <-done: case <-fatalTimeoutCh: - logCtx.Info("*************************************** EXEC FATAL TIMEOUT HAPPENED *************************************** " + execId) // upgrades to SIGKILL if cmd does not respect SIGTERM _ = cmd.Process.Signal(fatalTimeoutBehaviour) // now original cmd should exit immediately after SIGKILL @@ -282,9 +278,7 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) { logCtx.Error(err.Error()) return strings.TrimSuffix(output, "\n"), err case err := <-done: - logCtx.Info("*************************************** FINISHED ON TIME *************************************** " + execId) if err != nil { - logCtx.Error("*************************************** FINISHED ON TIME EXEC FAILED *************************************** " + execId) output := stdout.String() if opts.CaptureStderr { output += stderr.String() From 9c3f393a7bcb8d8c96c63548b1bcf765fb104d40 Mon Sep 17 00:00:00 2001 From: reggie-k Date: Mon, 15 Sep 2025 11:38:17 +0300 Subject: [PATCH 03/15] debug statements Signed-off-by: reggie-k --- util/exec/exec.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/util/exec/exec.go b/util/exec/exec.go index b4f74d2e1251c..2a9b8da7edf10 100644 --- a/util/exec/exec.go +++ b/util/exec/exec.go @@ -210,6 +210,8 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) { return "", err } + logHeadLockStatus("start exec") + done := make(chan error) go func() { done <- cmd.Wait() }() From 226ccc57530c9f1b3ac604844789065dcc007341 Mon Sep 17 00:00:00 2001 From: reggie-k Date: Mon, 15 Sep 2025 12:32:12 +0300 Subject: [PATCH 04/15] debug statements Signed-off-by: reggie-k --- util/exec/exec.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/exec/exec.go b/util/exec/exec.go index 2a9b8da7edf10..32bc292123c1c 100644 --- a/util/exec/exec.go +++ b/util/exec/exec.go @@ -210,7 +210,7 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) { return "", err } - logHeadLockStatus("start exec") + logHeadLockStatus("start-exec") done := make(chan error) go func() { done <- cmd.Wait() }() From afb4476158e901ea22977c3a7553d1dc08c8c933 Mon Sep 17 00:00:00 2001 From: reggie-k Date: Mon, 15 Sep 2025 14:17:05 +0300 Subject: [PATCH 05/15] handle lock file deletion before checkoputRevision Signed-off-by: reggie-k --- reposerver/repository/repository.go | 18 ++++++++++++++++ reposerver/repository/repository_test.go | 26 ++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/reposerver/repository/repository.go b/reposerver/repository/repository.go index 1abccf6bcd2dd..65d6aec3cc46c 100644 --- a/reposerver/repository/repository.go +++ b/reposerver/repository/repository.go @@ -2533,7 +2533,25 @@ func fetch(gitClient git.Client, targetRevisions []string) error { return nil } +// removeStaleGitLocks best-effort removes common stale git lock files in the repository. +// It is safe to call while holding the per-repo lock and before running any git commands. + +func removeStaleGitLocks(repoRoot string) { + path := filepath.Join(repoRoot, ".git", "HEAD.lock") + log.Infof("Checking whether HEAD.lock exists %s", path) + if _, err := os.Stat(path); err == nil { + log.Warnf("HEAD.lock present in git repository %s, removing it", repoRoot) + if rmErr := os.Remove(path); rmErr == nil { + log.Infof("Removed stale git lock %s", path) + } else if !os.IsNotExist(rmErr) { + log.Warnf("Failed to remove git lock %s: %v", path, rmErr) + } + } +} + func checkoutRevision(gitClient git.Client, revision string, submoduleEnabled bool) error { + removeStaleGitLocks(gitClient.Root()) + err := gitClient.Init() if err != nil { return status.Errorf(codes.Internal, "Failed to initialize git repo: %v", err) diff --git a/reposerver/repository/repository_test.go b/reposerver/repository/repository_test.go index e96fc3540a95e..d2b6856bcc49f 100644 --- a/reposerver/repository/repository_test.go +++ b/reposerver/repository/repository_test.go @@ -3016,6 +3016,29 @@ func TestInit(t *testing.T) { initGitRepo(t, newGitRepoOptions{path: path.Join(dir, "repo2"), remote: "https://github.com/argo-cd/test-repo2", createPath: true, addEmptyCommit: false}) } +func TestRemoveStaleGitHeadLock(t *testing.T) { + dir := t.TempDir() + // create a fake repo structure with .git/HEAD.lock + gitDir := path.Join(dir, ".git") + require.NoError(t, os.MkdirAll(gitDir, 0o755)) + headLock := path.Join(gitDir, "HEAD.lock") + require.NoError(t, os.WriteFile(headLock, []byte("test"), 0o644)) + + // sanity: lock exists before + _, err := os.Stat(headLock) + require.NoError(t, err) + + removeStaleGitLocks(dir) + + // lock should be gone after cleanup + _, err = os.Stat(headLock) + require.Error(t, err) + require.True(t, os.IsNotExist(err)) + + // calling again should be a no-op (no error) + removeStaleGitLocks(dir) +} + // TestCheckoutRevisionCanGetNonstandardRefs shows that we can fetch a revision that points to a non-standard ref. In // other words, we haven't regressed and caused this issue again: https://github.com/argoproj/argo-cd/issues/4935 func TestCheckoutRevisionCanGetNonstandardRefs(t *testing.T) { @@ -3057,6 +3080,7 @@ func TestCheckoutRevisionPresentSkipFetch(t *testing.T) { gitClient := &gitmocks.Client{} gitClient.On("Init").Return(nil) + gitClient.On("Root").Return("") gitClient.On("IsRevisionPresent", revision).Return(true) gitClient.On("Checkout", revision, mock.Anything).Return("", nil) @@ -3069,6 +3093,7 @@ func TestCheckoutRevisionNotPresentCallFetch(t *testing.T) { gitClient := &gitmocks.Client{} gitClient.On("Init").Return(nil) + gitClient.On("Root").Return("") gitClient.On("IsRevisionPresent", revision).Return(false) gitClient.On("Fetch", "").Return(nil) gitClient.On("Checkout", revision, mock.Anything).Return("", nil) @@ -3083,6 +3108,7 @@ func TestFetch(t *testing.T) { gitClient := &gitmocks.Client{} gitClient.On("Init").Return(nil) + gitClient.On("Root").Return("") gitClient.On("IsRevisionPresent", revision1).Once().Return(true) gitClient.On("IsRevisionPresent", revision2).Once().Return(false) gitClient.On("Fetch", "").Return(nil) From 7050d0bd7e350f36754347ffd8f77370f265c5b2 Mon Sep 17 00:00:00 2001 From: reggie-k Date: Mon, 15 Sep 2025 14:30:01 +0300 Subject: [PATCH 06/15] handle lock file deletion before checkoputRevision Signed-off-by: reggie-k --- reposerver/repository/repository.go | 10 ++++------ reposerver/repository/repository_test.go | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/reposerver/repository/repository.go b/reposerver/repository/repository.go index 65d6aec3cc46c..d861f9de5b33e 100644 --- a/reposerver/repository/repository.go +++ b/reposerver/repository/repository.go @@ -2536,21 +2536,19 @@ func fetch(gitClient git.Client, targetRevisions []string) error { // removeStaleGitLocks best-effort removes common stale git lock files in the repository. // It is safe to call while holding the per-repo lock and before running any git commands. -func removeStaleGitLocks(repoRoot string) { +func removeStaleGitLock(repoRoot string) { path := filepath.Join(repoRoot, ".git", "HEAD.lock") log.Infof("Checking whether HEAD.lock exists %s", path) if _, err := os.Stat(path); err == nil { log.Warnf("HEAD.lock present in git repository %s, removing it", repoRoot) - if rmErr := os.Remove(path); rmErr == nil { - log.Infof("Removed stale git lock %s", path) - } else if !os.IsNotExist(rmErr) { - log.Warnf("Failed to remove git lock %s: %v", path, rmErr) + if rmErr := os.Remove(path); rmErr != nil { + log.Errorf("Failed to remove git lock %s: %v", path, rmErr) } } } func checkoutRevision(gitClient git.Client, revision string, submoduleEnabled bool) error { - removeStaleGitLocks(gitClient.Root()) + removeStaleGitLock(gitClient.Root()) err := gitClient.Init() if err != nil { diff --git a/reposerver/repository/repository_test.go b/reposerver/repository/repository_test.go index d2b6856bcc49f..13188561c9f5e 100644 --- a/reposerver/repository/repository_test.go +++ b/reposerver/repository/repository_test.go @@ -3028,7 +3028,7 @@ func TestRemoveStaleGitHeadLock(t *testing.T) { _, err := os.Stat(headLock) require.NoError(t, err) - removeStaleGitLocks(dir) + removeStaleGitLock(dir) // lock should be gone after cleanup _, err = os.Stat(headLock) @@ -3036,7 +3036,7 @@ func TestRemoveStaleGitHeadLock(t *testing.T) { require.True(t, os.IsNotExist(err)) // calling again should be a no-op (no error) - removeStaleGitLocks(dir) + removeStaleGitLock(dir) } // TestCheckoutRevisionCanGetNonstandardRefs shows that we can fetch a revision that points to a non-standard ref. In From af392b7bc2daa93ea4f6ba1228c0275be0dd2f26 Mon Sep 17 00:00:00 2001 From: reggie-k Date: Mon, 15 Sep 2025 16:39:53 +0300 Subject: [PATCH 07/15] handle lock file deletion before checkoputRevision Signed-off-by: reggie-k --- util/exec/exec.go | 17 ++++++++++++++--- util/exec/exec_unix.go | 13 +++++++++++++ util/exec/exec_windows.go | 10 ++++++++++ 3 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 util/exec/exec_unix.go create mode 100644 util/exec/exec_windows.go diff --git a/util/exec/exec.go b/util/exec/exec.go index 32bc292123c1c..260013cea51d4 100644 --- a/util/exec/exec.go +++ b/util/exec/exec.go @@ -204,6 +204,12 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) { cmd.Stdout = &stdout cmd.Stderr = &stderr + // Configure the child to run in its own process group so we can signal the whole group on timeout/cancel. + // On Unix this sets Setpgid; on Windows this is a no-op. + if cmd.SysProcAttr == nil { + cmd.SysProcAttr = newSysProcAttr(true) + } + start := time.Now() err = cmd.Start() if err != nil { @@ -247,14 +253,19 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) { // noinspection ALL case <-timoutCh: // send timeout signal - _ = cmd.Process.Signal(timeoutBehavior.Signal) + // signal the process group (negative PID) so children are terminated as well + if cmd.Process != nil { + _ = sysCallSignal(-cmd.Process.Pid, timeoutBehavior.Signal) + } // wait on timeout signal and fallback to fatal timeout signal if timeoutBehavior.ShouldWait { select { case <-done: case <-fatalTimeoutCh: - // upgrades to SIGKILL if cmd does not respect SIGTERM - _ = cmd.Process.Signal(fatalTimeoutBehaviour) + // upgrades to fatal signal (default SIGKILL) if cmd does not respect the initial signal + if cmd.Process != nil { + _ = sysCallSignal(-cmd.Process.Pid, fatalTimeoutBehaviour) + } // now original cmd should exit immediately after SIGKILL <-done // return error with a marker indicating that cmd exited only after fatal SIGKILL diff --git a/util/exec/exec_unix.go b/util/exec/exec_unix.go new file mode 100644 index 0000000000000..648a633e60e6e --- /dev/null +++ b/util/exec/exec_unix.go @@ -0,0 +1,13 @@ +//go:build !windows +// +build !windows + +package exec + +import "syscall" + +func newSysProcAttr(setpgid bool) *syscall.SysProcAttr { + return &syscall.SysProcAttr{Setpgid: setpgid} +} +func sysCallSignal(pid int, sig syscall.Signal) error { + return syscall.Kill(pid, sig) +} diff --git a/util/exec/exec_windows.go b/util/exec/exec_windows.go new file mode 100644 index 0000000000000..229ca37f31a5a --- /dev/null +++ b/util/exec/exec_windows.go @@ -0,0 +1,10 @@ +//go:build windows +// +build windows + +package exec + +import "syscall" + +func newSysProcAttr(_ bool) *syscall.SysProcAttr { return &syscall.SysProcAttr{} } + +func sysCallSignal(_ int, _ syscall.Signal) error { return nil } From 54dafb28aa2e7297d8668a933ca01a63bac9f617 Mon Sep 17 00:00:00 2001 From: reggie-k Date: Mon, 15 Sep 2025 16:51:19 +0300 Subject: [PATCH 08/15] start the process with it's own group and send signals to the entire process group Signed-off-by: reggie-k --- util/exec/exec_unix.go | 1 + 1 file changed, 1 insertion(+) diff --git a/util/exec/exec_unix.go b/util/exec/exec_unix.go index 648a633e60e6e..dd6614ed7c375 100644 --- a/util/exec/exec_unix.go +++ b/util/exec/exec_unix.go @@ -8,6 +8,7 @@ import "syscall" func newSysProcAttr(setpgid bool) *syscall.SysProcAttr { return &syscall.SysProcAttr{Setpgid: setpgid} } + func sysCallSignal(pid int, sig syscall.Signal) error { return syscall.Kill(pid, sig) } From d5e06bbd1bef9e224e7dd7488805a78f2c884f95 Mon Sep 17 00:00:00 2001 From: reggie-k Date: Tue, 16 Sep 2025 15:41:31 +0300 Subject: [PATCH 09/15] added unit test for lock file absense upon signaling the process group Signed-off-by: reggie-k --- util/exec/exec_test.go | 50 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/util/exec/exec_test.go b/util/exec/exec_test.go index 6fc284aaa129f..5eb8159b77f14 100644 --- a/util/exec/exec_test.go +++ b/util/exec/exec_test.go @@ -1,7 +1,9 @@ package exec import ( + "os" "os/exec" + "path/filepath" "regexp" "syscall" "testing" @@ -215,3 +217,51 @@ func TestRunCaptureStderr(t *testing.T) { assert.Equal(t, "hello world\nmy-error", output) assert.NoError(t, err) } + +// This test demonstrates that when a process group is signaled, all child processes are also terminated and file locks released. +func TestProcessGroupSignalRemovesChildLock(t *testing.T) { + hook := test.NewGlobal() + log.SetLevel(log.DebugLevel) + defer log.SetLevel(log.InfoLevel) + + dir := t.TempDir() + lockFile := filepath.Join(dir, "lockfile") + childScript := filepath.Join(dir, "child.sh") + parentScript := filepath.Join(dir, "parent.sh") + + // Child: create lock file; on SIGTERM remove it and exit + child := "#!/bin/sh\n" + + "trap 'rm -f lockfile; exit 0' TERM\n" + + "touch lockfile\n" + + "sleep 100\n" + require.NoError(t, os.WriteFile(childScript, []byte(child), 0o755)) + + // Parent: start child in background and sleep + parent := "#!/bin/sh\n" + + "./child.sh &\n" + + "sleep 100\n" + require.NoError(t, os.WriteFile(parentScript, []byte(parent), 0o755)) + + // Run parent with a short timeout; our implementation signals the process group + opts := CmdOpts{ + Timeout: 500 * time.Millisecond, + FatalTimeout: 500 * time.Millisecond, + TimeoutBehavior: TimeoutBehavior{ + Signal: syscall.SIGTERM, + ShouldWait: true, + }, + } + _, err := RunCommand("sh", opts, "-c", "cd "+dir+" && ./parent.sh") + require.Error(t, err) + + // Give a bit of time for traps to run and for the process tree to settle + time.Sleep(200 * time.Millisecond) + + // Because the process group was signaled, the child should have removed the lock + _, statErr := os.Stat(lockFile) + require.Error(t, statErr, "expected lock file to be removed when process group is signaled") + assert.True(t, os.IsNotExist(statErr)) + + // basic sanity: logs produced + require.GreaterOrEqual(t, len(hook.Entries), 1) +} From 1af53d1d31ab2009385c1bce51382bfecb137dfb Mon Sep 17 00:00:00 2001 From: reggie-k Date: Wed, 17 Sep 2025 13:12:14 +0300 Subject: [PATCH 10/15] more logging of processes in process group if the lock file is found Signed-off-by: reggie-k --- reposerver/repository/repository.go | 22 +++++----- reposerver/repository/repository_test.go | 44 +++++++++---------- .../.argocd-helm-dep-up | 1 + util/exec/exec.go | 25 +++++++++-- 4 files changed, 56 insertions(+), 36 deletions(-) create mode 100644 reposerver/repository/testdata/helm-with-local-dependency/.argocd-helm-dep-up diff --git a/reposerver/repository/repository.go b/reposerver/repository/repository.go index d861f9de5b33e..950a307789bb0 100644 --- a/reposerver/repository/repository.go +++ b/reposerver/repository/repository.go @@ -2536,19 +2536,19 @@ func fetch(gitClient git.Client, targetRevisions []string) error { // removeStaleGitLocks best-effort removes common stale git lock files in the repository. // It is safe to call while holding the per-repo lock and before running any git commands. -func removeStaleGitLock(repoRoot string) { - path := filepath.Join(repoRoot, ".git", "HEAD.lock") - log.Infof("Checking whether HEAD.lock exists %s", path) - if _, err := os.Stat(path); err == nil { - log.Warnf("HEAD.lock present in git repository %s, removing it", repoRoot) - if rmErr := os.Remove(path); rmErr != nil { - log.Errorf("Failed to remove git lock %s: %v", path, rmErr) - } - } -} +// func removeStaleGitLock(repoRoot string) { +// path := filepath.Join(repoRoot, ".git", "HEAD.lock") +// log.Infof("Checking whether HEAD.lock exists %s", path) +// if _, err := os.Stat(path); err == nil { +// log.Warnf("HEAD.lock present in git repository %s, removing it", repoRoot) +// if rmErr := os.Remove(path); rmErr != nil { +// log.Errorf("Failed to remove git lock %s: %v", path, rmErr) +// } +// } +// } func checkoutRevision(gitClient git.Client, revision string, submoduleEnabled bool) error { - removeStaleGitLock(gitClient.Root()) + // removeStaleGitLock(gitClient.Root()) err := gitClient.Init() if err != nil { diff --git a/reposerver/repository/repository_test.go b/reposerver/repository/repository_test.go index 13188561c9f5e..006d2cef3f508 100644 --- a/reposerver/repository/repository_test.go +++ b/reposerver/repository/repository_test.go @@ -3016,28 +3016,28 @@ func TestInit(t *testing.T) { initGitRepo(t, newGitRepoOptions{path: path.Join(dir, "repo2"), remote: "https://github.com/argo-cd/test-repo2", createPath: true, addEmptyCommit: false}) } -func TestRemoveStaleGitHeadLock(t *testing.T) { - dir := t.TempDir() - // create a fake repo structure with .git/HEAD.lock - gitDir := path.Join(dir, ".git") - require.NoError(t, os.MkdirAll(gitDir, 0o755)) - headLock := path.Join(gitDir, "HEAD.lock") - require.NoError(t, os.WriteFile(headLock, []byte("test"), 0o644)) - - // sanity: lock exists before - _, err := os.Stat(headLock) - require.NoError(t, err) +// func TestRemoveStaleGitHeadLock(t *testing.T) { +// dir := t.TempDir() +// // create a fake repo structure with .git/HEAD.lock +// gitDir := path.Join(dir, ".git") +// require.NoError(t, os.MkdirAll(gitDir, 0o755)) +// headLock := path.Join(gitDir, "HEAD.lock") +// require.NoError(t, os.WriteFile(headLock, []byte("test"), 0o644)) - removeStaleGitLock(dir) +// // sanity: lock exists before +// _, err := os.Stat(headLock) +// require.NoError(t, err) - // lock should be gone after cleanup - _, err = os.Stat(headLock) - require.Error(t, err) - require.True(t, os.IsNotExist(err)) +// removeStaleGitLock(dir) - // calling again should be a no-op (no error) - removeStaleGitLock(dir) -} +// // lock should be gone after cleanup +// _, err = os.Stat(headLock) +// require.Error(t, err) +// require.True(t, os.IsNotExist(err)) + +// // calling again should be a no-op (no error) +// removeStaleGitLock(dir) +// } // TestCheckoutRevisionCanGetNonstandardRefs shows that we can fetch a revision that points to a non-standard ref. In // other words, we haven't regressed and caused this issue again: https://github.com/argoproj/argo-cd/issues/4935 @@ -3080,7 +3080,7 @@ func TestCheckoutRevisionPresentSkipFetch(t *testing.T) { gitClient := &gitmocks.Client{} gitClient.On("Init").Return(nil) - gitClient.On("Root").Return("") + // gitClient.On("Root").Return("") gitClient.On("IsRevisionPresent", revision).Return(true) gitClient.On("Checkout", revision, mock.Anything).Return("", nil) @@ -3093,7 +3093,7 @@ func TestCheckoutRevisionNotPresentCallFetch(t *testing.T) { gitClient := &gitmocks.Client{} gitClient.On("Init").Return(nil) - gitClient.On("Root").Return("") + // gitClient.On("Root").Return("") gitClient.On("IsRevisionPresent", revision).Return(false) gitClient.On("Fetch", "").Return(nil) gitClient.On("Checkout", revision, mock.Anything).Return("", nil) @@ -3108,7 +3108,7 @@ func TestFetch(t *testing.T) { gitClient := &gitmocks.Client{} gitClient.On("Init").Return(nil) - gitClient.On("Root").Return("") + // gitClient.On("Root").Return("") gitClient.On("IsRevisionPresent", revision1).Once().Return(true) gitClient.On("IsRevisionPresent", revision2).Once().Return(false) gitClient.On("Fetch", "").Return(nil) diff --git a/reposerver/repository/testdata/helm-with-local-dependency/.argocd-helm-dep-up b/reposerver/repository/testdata/helm-with-local-dependency/.argocd-helm-dep-up new file mode 100644 index 0000000000000..ae5cd50af9a31 --- /dev/null +++ b/reposerver/repository/testdata/helm-with-local-dependency/.argocd-helm-dep-up @@ -0,0 +1 @@ +marker \ No newline at end of file diff --git a/util/exec/exec.go b/util/exec/exec.go index 260013cea51d4..68c72fd3ca143 100644 --- a/util/exec/exec.go +++ b/util/exec/exec.go @@ -190,13 +190,31 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) { return } lockPath := filepath.Join(cmd.Dir, ".git", "HEAD.lock") - _, statErr := os.Stat(lockPath) + fileInfo, statErr := os.Stat(lockPath) exists := statErr == nil - logCtx.WithFields(logrus.Fields{ + fields := logrus.Fields{ "headLockPath": lockPath, "headLockExists": exists, "where": where, - }).Info("HEAD.lock status execId=" + execId) + } + if exists { + fields["headLockSize"] = fileInfo.Size() + fields["headLockMode"] = fileInfo.Mode().String() + fields["headLockModTime"] = fileInfo.ModTime() + fields["headLockIsDir"] = fileInfo.IsDir() + + pgid, pgErr := syscall.Getpgid(cmd.Process.Pid) + if pgErr == nil && pgid > 0 { + out, _ := exec.Command( + "ps", + "-o", "pid,ppid,pgid,etime,comm,args", + "--no-headers", + "--pgroup", strconv.Itoa(pgid), + ).CombinedOutput() + fields["processesInGroup"] = strings.TrimSpace(string(out)) + } + } + logCtx.WithFields(fields).Info("HEAD.lock status") } var stdout bytes.Buffer @@ -261,6 +279,7 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) { if timeoutBehavior.ShouldWait { select { case <-done: + logHeadLockStatus("timeout-waited-done") case <-fatalTimeoutCh: // upgrades to fatal signal (default SIGKILL) if cmd does not respect the initial signal if cmd.Process != nil { From 7023f5907fb53dbc42ea90fc609795c880c5fe4d Mon Sep 17 00:00:00 2001 From: reggie-k Date: Wed, 17 Sep 2025 13:12:31 +0300 Subject: [PATCH 11/15] more logging of processes in process group if the lock file is found Signed-off-by: reggie-k --- .../testdata/helm-with-local-dependency/.argocd-helm-dep-up | 1 - 1 file changed, 1 deletion(-) delete mode 100644 reposerver/repository/testdata/helm-with-local-dependency/.argocd-helm-dep-up diff --git a/reposerver/repository/testdata/helm-with-local-dependency/.argocd-helm-dep-up b/reposerver/repository/testdata/helm-with-local-dependency/.argocd-helm-dep-up deleted file mode 100644 index ae5cd50af9a31..0000000000000 --- a/reposerver/repository/testdata/helm-with-local-dependency/.argocd-helm-dep-up +++ /dev/null @@ -1 +0,0 @@ -marker \ No newline at end of file From 482570b2f0f78661a76d3819e2a92cbaf0a87304 Mon Sep 17 00:00:00 2001 From: reggie-k Date: Thu, 18 Sep 2025 12:05:36 +0300 Subject: [PATCH 12/15] deletion of HEAD.lock file upon finish of RunCommandExt Signed-off-by: reggie-k --- util/exec/exec.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/util/exec/exec.go b/util/exec/exec.go index 68c72fd3ca143..faa216ab4616a 100644 --- a/util/exec/exec.go +++ b/util/exec/exec.go @@ -184,6 +184,21 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) { args := strings.Join(cmd.Args, " ") logCtx.WithFields(logrus.Fields{"dir": cmd.Dir}).Info(redactor(args)) + // Best-effort cleanup of a stale HEAD.lock after the command finishes. + defer func() { + if cmd.Dir == "" { + return + } + lockPath := filepath.Join(cmd.Dir, ".git", "HEAD.lock") + if _, err := os.Stat(lockPath); err == nil { + // Log and attempt removal; ignore ENOENT races + logCtx.WithFields(logrus.Fields{"headLockPath": lockPath}).Warn("HEAD.lock present post-exec, removing it") + if rmErr := os.Remove(lockPath); rmErr != nil && !os.IsNotExist(rmErr) { + logCtx.WithFields(logrus.Fields{"headLockPath": lockPath}).Warnf("Failed to remove HEAD.lock: %v", rmErr) + } + } + }() + // Helper: debug whether HEAD.lock exists under the current working directory logHeadLockStatus := func(where string) { if cmd.Dir == "" { From 31f5e12a099fe07cb6980aafbfd3775d4a3a7d52 Mon Sep 17 00:00:00 2001 From: reggie-k Date: Thu, 18 Sep 2025 17:53:04 +0300 Subject: [PATCH 13/15] print path upon deferred func Signed-off-by: reggie-k --- util/exec/exec.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/util/exec/exec.go b/util/exec/exec.go index faa216ab4616a..55231332a3a57 100644 --- a/util/exec/exec.go +++ b/util/exec/exec.go @@ -186,16 +186,22 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) { // Best-effort cleanup of a stale HEAD.lock after the command finishes. defer func() { + if cmd.Dir == "" { return } lockPath := filepath.Join(cmd.Dir, ".git", "HEAD.lock") + logCtx.WithFields(logrus.Fields{"headLockPath": lockPath}).Info("Checking HEAD.lock presense post-exec") if _, err := os.Stat(lockPath); err == nil { // Log and attempt removal; ignore ENOENT races logCtx.WithFields(logrus.Fields{"headLockPath": lockPath}).Warn("HEAD.lock present post-exec, removing it") if rmErr := os.Remove(lockPath); rmErr != nil && !os.IsNotExist(rmErr) { logCtx.WithFields(logrus.Fields{"headLockPath": lockPath}).Warnf("Failed to remove HEAD.lock: %v", rmErr) } + } else if !os.IsNotExist(err) { + logCtx.WithFields(logrus.Fields{"headLockPath": lockPath}).Warnf("Failed to stat HEAD.lock: %v", err) + } else { + logCtx.WithFields(logrus.Fields{"headLockPath": lockPath}).Info("HEAD.lock not present post-exec") } }() From cfd9b8a45d6ea2b154cc9781aabca17358b14e5d Mon Sep 17 00:00:00 2001 From: reggie-k Date: Thu, 18 Sep 2025 18:37:10 +0300 Subject: [PATCH 14/15] hopefully rolling back the cmd debug --- util/exec/exec.go | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/util/exec/exec.go b/util/exec/exec.go index 55231332a3a57..eebb8141017eb 100644 --- a/util/exec/exec.go +++ b/util/exec/exec.go @@ -186,12 +186,11 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) { // Best-effort cleanup of a stale HEAD.lock after the command finishes. defer func() { - if cmd.Dir == "" { return } lockPath := filepath.Join(cmd.Dir, ".git", "HEAD.lock") - logCtx.WithFields(logrus.Fields{"headLockPath": lockPath}).Info("Checking HEAD.lock presense post-exec") + logCtx.WithFields(logrus.Fields{"headLockPath": lockPath}).Info("Checking HEAD.lock presence post-exec") if _, err := os.Stat(lockPath); err == nil { // Log and attempt removal; ignore ENOENT races logCtx.WithFields(logrus.Fields{"headLockPath": lockPath}).Warn("HEAD.lock present post-exec, removing it") @@ -218,22 +217,42 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) { "headLockExists": exists, "where": where, } + + pgid, pgErr := syscall.Getpgid(cmd.Process.Pid) + if pgErr == nil && pgid > 0 { + // Portable ps: list all processes, print needed columns without headers, then filter by PGID in Go. + out, _ := exec.Command( + "ps", + "-ax", + "-o", "pid=,ppid=,pgid=,etime=,comm=,args=", + ).Output() + if len(out) > 0 { + var b strings.Builder + want := strconv.Itoa(pgid) + for _, line := range strings.Split(string(out), "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + fieldsSlice := strings.Fields(line) + if len(fieldsSlice) < 3 { + continue + } + if fieldsSlice[2] == want { + b.WriteString(line) + b.WriteByte('\n') + } + } + fields["gitProcsInGroup"] = strings.TrimSpace(b.String()) + } + } + if exists { fields["headLockSize"] = fileInfo.Size() fields["headLockMode"] = fileInfo.Mode().String() fields["headLockModTime"] = fileInfo.ModTime() fields["headLockIsDir"] = fileInfo.IsDir() - pgid, pgErr := syscall.Getpgid(cmd.Process.Pid) - if pgErr == nil && pgid > 0 { - out, _ := exec.Command( - "ps", - "-o", "pid,ppid,pgid,etime,comm,args", - "--no-headers", - "--pgroup", strconv.Itoa(pgid), - ).CombinedOutput() - fields["processesInGroup"] = strings.TrimSpace(string(out)) - } } logCtx.WithFields(fields).Info("HEAD.lock status") } From d7603b7fec4b1971fe0bc11bce6125ae85eee047 Mon Sep 17 00:00:00 2001 From: reggie-k Date: Thu, 18 Sep 2025 19:02:14 +0300 Subject: [PATCH 15/15] print path upon deferred func and print the process group anyway linting fixed Signed-off-by: reggie-k --- util/exec/exec.go | 1 - 1 file changed, 1 deletion(-) diff --git a/util/exec/exec.go b/util/exec/exec.go index eebb8141017eb..024191aaae2b0 100644 --- a/util/exec/exec.go +++ b/util/exec/exec.go @@ -256,7 +256,6 @@ func RunCommandExt(cmd *exec.Cmd, opts CmdOpts) (string, error) { } logCtx.WithFields(fields).Info("HEAD.lock status") } - var stdout bytes.Buffer var stderr bytes.Buffer cmd.Stdout = &stdout