44 "context"
55 "fmt"
66 "os"
7+ "strconv"
78 "strings"
89 "sync"
910 "time"
@@ -42,6 +43,56 @@ const (
4243
4344var NamespaceLabels = map [string ]string {E2ETestLabelsKey : E2ETestLabelsValue }
4445
46+ // waitForRootPartitionToHaveMinimumDiskSpace:
47+ // - When running via GitHub (GH) action, the E2E test environment has only ~14GiB of disk space available.
48+ // - As the E2E tests run, and as the K8s cluster persists data, that available storage drops over time.
49+ // - This drop is especially significant during parallel test execution, as multiple Argo CD instances are running/logging at the same time.
50+ // - When the availabel disk space drops to 4GiB, the K8s instance will start to arbitrarily evict pods, which causes tests to intermittently fail.
51+ // - As a workaround (since we can't increase the GH action env), each parallel test will wait for a minimum of disk space before starting.
52+ // - Before each parallel test, we thus run `df` command and wait for it to tell use that >= 5GB of disk space is available.
53+ func waitForRootPartitionToHaveMinimumDiskSpace () {
54+
55+ GinkgoWriter .Println ("waitForRootPartitionToHaveMinimumDiskSpace" )
56+
57+ for {
58+
59+ output , err := osFixture .ExecCommandWithOutputParam (true , "df" )
60+ Expect (err ).ToNot (HaveOccurred ())
61+ GinkgoWriter .Println ("JGW------------------" )
62+ var rootEntry string
63+ for line := range strings .SplitSeq (output , "\n " ) {
64+ GinkgoWriter .Println ("-" , line )
65+ if strings .Contains (line , "/dev/root" ) {
66+ rootEntry = line
67+ break
68+ }
69+ }
70+ if rootEntry == "" {
71+ GinkgoWriter .Println ("No /dev/root volume to manage" )
72+ return
73+ }
74+
75+ // Split output by whitespace to parse df fields
76+ fields := strings .Fields (rootEntry )
77+ Expect (len (fields )).To (BeNumerically (">=" , 4 ), "df output should have at least 4 fields" )
78+
79+ // Parse fields[3] which is the available space in bytes
80+ availableBytes , err := strconv .ParseInt (fields [3 ], 10 , 64 )
81+ Expect (err ).ToNot (HaveOccurred (), "failed to parse available bytes from df output" )
82+
83+ // Convert bytes to gigabytes (1 GB = 1024^3 bytes)
84+ availableGB := availableBytes / (1024 * 1024 * 1024 )
85+
86+ // If less than 6 GB available, sleep and continue the loop
87+ if availableGB < 6 {
88+ GinkgoWriter .Println ("Waiting for /dev/root volume to have minimum size, current size:" , availableGB , "GB" )
89+ time .Sleep (time .Second * 10 )
90+ } else {
91+ return
92+ }
93+ }
94+ }
95+
4596func EnsureParallelCleanSlate () {
4697
4798 // Increase the maximum length of debug output, for when tests fail
@@ -51,6 +102,8 @@ func EnsureParallelCleanSlate() {
51102 SetDefaultConsistentlyDuration (time .Second * 10 )
52103 SetDefaultConsistentlyPollingInterval (time .Second * 1 )
53104
105+ waitForRootPartitionToHaveMinimumDiskSpace ()
106+
54107 // Unlike sequential clean slate, parallel clean slate cannot assume that there are no other tests running. This limits our ability to clean up old test artifacts.
55108}
56109
0 commit comments