Skip to content

Commit f339cd8

Browse files
authored
OOM (Out of memory) status (#40)
1 parent 186d8a3 commit f339cd8

File tree

6 files changed

+30
-8
lines changed

6 files changed

+30
-8
lines changed

pkg/api/resource/saved_status.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ const (
4747
ExitCodeDataSucceeded DataExitCode = "succeeded"
4848
ExitCodeDataFailed DataExitCode = "failed"
4949
ExitCodeDataKilled DataExitCode = "killed"
50+
ExitCodeDataOOM DataExitCode = "oom"
5051
)
5152

5253
func DataSavedStatusPtrsEqual(savedStatus *DataSavedStatus, savedStatus2 *DataSavedStatus) bool {

pkg/api/resource/status.go

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@ const (
109109
StatusAPIGroupParentFailed
110110
StatusAPIGroupParentKilled
111111
StatusAPIGroupUpdateSkipped
112+
113+
StatusDataKilledOOM
112114
)
113115

114116
var statusCodes = []string{
@@ -136,9 +138,11 @@ var statusCodes = []string{
136138
"status_api_group_parent_failed",
137139
"status_api_group_parent_killed",
138140
"status_api_group_update_skipped",
141+
142+
"status_data_oom",
139143
}
140144

141-
var _ = [1]int{}[int(StatusAPIGroupUpdateSkipped)-(len(statusCodes)-1)] // Ensure list length matches
145+
var _ = [1]int{}[int(StatusDataKilledOOM)-(len(statusCodes)-1)] // Ensure list length matches
142146

143147
var statusCodeMessages = []string{
144148
"unknown", // StatusUnknown
@@ -165,9 +169,11 @@ var statusCodeMessages = []string{
165169
"upstream error", // StatusAPIGroupParentFailed
166170
"upstream termination", // StatusAPIGroupParentKilled
167171
"update skipped", // StatusAPIGroupUpdateSkipped
172+
173+
"terminated (out of mem)", // StatusDataOOM
168174
}
169175

170-
var _ = [1]int{}[int(StatusAPIGroupUpdateSkipped)-(len(statusCodeMessages)-1)] // Ensure list length matches
176+
var _ = [1]int{}[int(StatusDataKilledOOM)-(len(statusCodeMessages)-1)] // Ensure list length matches
171177

172178
// StatusDataRunning aliases
173179
const (
@@ -203,9 +209,11 @@ var statusSortBuckets = []int{
203209
2, // StatusAPIGroupParentFailed
204210
2, // StatusAPIGroupParentKilled
205211
2, // StatusAPIGroupUpdateSkipped
212+
213+
1, // StatusDataKilledOOM
206214
}
207215

208-
var _ = [1]int{}[int(StatusAPIGroupUpdateSkipped)-(len(statusSortBuckets)-1)] // Ensure list length matches
216+
var _ = [1]int{}[int(StatusDataKilledOOM)-(len(statusSortBuckets)-1)] // Ensure list length matches
209217

210218
func (code StatusCode) String() string {
211219
if int(code) < 0 || int(code) >= len(statusCodes) {

pkg/operator/k8s/pod.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,14 @@ var podTypeMeta = metav1.TypeMeta{
3333
}
3434

3535
const (
36+
PodStatusUnknown = "Unknown"
3637
PodStatusPending = "Pending"
3738
PodStatusRunning = "Running"
3839
PodStatusTerminating = "Terminating"
3940
PodStatusSucceeded = "Succeeded"
4041
PodStatusFailed = "Failed"
4142
PodStatusKilled = "Killed"
42-
PodStatusUnknown = "Unknown"
43+
PodStatusKilledOOM = "Out of Memory"
4344
)
4445

4546
var killStatuses = map[int32]bool{
@@ -108,11 +109,17 @@ func GetPodStatus(pod *corev1.Pod) string {
108109
for _, containerStatus := range pod.Status.ContainerStatuses {
109110
if containerStatus.LastTerminationState.Terminated != nil {
110111
exitCode := containerStatus.LastTerminationState.Terminated.ExitCode
112+
if exitCode == 137 {
113+
return PodStatusKilledOOM
114+
}
111115
if killStatuses[exitCode] {
112116
return PodStatusKilled
113117
}
114118
} else if containerStatus.State.Terminated != nil {
115119
exitCode := containerStatus.State.Terminated.ExitCode
120+
if exitCode == 137 {
121+
return PodStatusKilledOOM
122+
}
116123
if killStatuses[exitCode] {
117124
return PodStatusKilled
118125
}

pkg/operator/workloads/api_status.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ func updateAPIStatusCodeByParents(apiStatus *resource.APIStatus, dataStatuses ma
194194
parentSkipped := false
195195
for dependency := range allDependencies {
196196
switch dataStatuses[dependency].Code {
197-
case resource.StatusDataKilled:
197+
case resource.StatusDataKilled, resource.StatusDataKilledOOM:
198198
apiStatus.Code = resource.StatusParentKilled
199199
return
200200
case resource.StatusDataFailed:

pkg/operator/workloads/data_status.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ func dataStatusCode(dataSavedStatus *resource.DataSavedStatus) resource.StatusCo
7777
return resource.StatusDataFailed
7878
case resource.ExitCodeDataKilled:
7979
return resource.StatusDataKilled
80+
case resource.ExitCodeDataOOM:
81+
return resource.StatusDataKilledOOM
8082
}
8183

8284
return resource.StatusUnknown
@@ -91,7 +93,7 @@ func updateDataStatusCodeByParents(dataStatus *resource.DataStatus, dataStatuses
9193
parentSkipped := false
9294
for dependency := range allDependencies {
9395
switch dataStatuses[dependency].Code {
94-
case resource.StatusDataKilled:
96+
case resource.StatusDataKilled, resource.StatusDataKilledOOM:
9597
dataStatus.Code = resource.StatusParentKilled
9698
return
9799
case resource.StatusDataFailed:

pkg/operator/workloads/workload_spec.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,13 @@ func UpdateDataWorkflowErrors(failedPods []corev1.Pod) error {
150150
savedStatus.Start = nowTime
151151
}
152152

153-
savedStatus.ExitCode = resource.ExitCodeDataFailed
154-
if k8s.GetPodStatus(&pod) == k8s.PodStatusKilled {
153+
switch k8s.GetPodStatus(&pod) {
154+
case k8s.PodStatusKilled:
155155
savedStatus.ExitCode = resource.ExitCodeDataKilled
156+
case k8s.PodStatusKilledOOM:
157+
savedStatus.ExitCode = resource.ExitCodeDataOOM
158+
default:
159+
savedStatus.ExitCode = resource.ExitCodeDataFailed
156160
}
157161

158162
savedStatusesToUpload = append(savedStatusesToUpload, savedStatus)

0 commit comments

Comments
 (0)