Skip to content

Commit 45087c7

Browse files
committed
Merge branch 'master' into deployment-phase
2 parents 127b6b7 + 980eb95 commit 45087c7

File tree

4 files changed

+69
-0
lines changed

4 files changed

+69
-0
lines changed

pkg/apis/deployment/v1alpha/conditions.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ const (
3737
ConditionTypeTerminated ConditionType = "Terminated"
3838
// ConditionTypeAutoUpgrade indicates that the member has to be started with `--database.auto-upgrade` once.
3939
ConditionTypeAutoUpgrade ConditionType = "AutoUpgrade"
40+
// ConditionTypePodSchedulingFailure indicates that one or more pods belonging to the deployment cannot be schedule.
41+
ConditionTypePodSchedulingFailure ConditionType = "PodSchedulingFailure"
4042
// ConditionTypeSecretsChanged indicates that the value of one of more secrets used by
4143
// the deployment have changed. Once that is the case, the operator will no longer
4244
// touch the deployment, until the original secrets have been restored.

pkg/deployment/resources/pod_inspector.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
package resources
2424

2525
import (
26+
"fmt"
27+
"time"
28+
2629
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
2730
"k8s.io/api/core/v1"
2831

@@ -34,6 +37,10 @@ var (
3437
inspectedPodCounter = metrics.MustRegisterCounter("deployment", "inspected_pods", "Number of pod inspections")
3538
)
3639

40+
const (
41+
podScheduleTimeout = time.Minute // How long we allow the schedule to take scheduling a pod.
42+
)
43+
3744
// InspectPods lists all pods that belong to the given deployment and updates
3845
// the member status of the deployment accordingly.
3946
func (r *Resources) InspectPods() error {
@@ -49,6 +56,8 @@ func (r *Resources) InspectPods() error {
4956
// Update member status from all pods found
5057
status := r.context.GetStatus()
5158
apiObject := r.context.GetAPIObject()
59+
var podNamesWithScheduleTimeout []string
60+
var unscheduledPodNames []string
5261
for _, p := range pods {
5362
if k8sutil.IsArangoDBImageIDAndVersionPod(p) {
5463
// Image ID pods are not relevant to inspect here
@@ -93,6 +102,13 @@ func (r *Resources) InspectPods() error {
93102
updateMemberStatusNeeded = true
94103
}
95104
}
105+
if k8sutil.IsPodNotScheduledFor(&p, podScheduleTimeout) {
106+
// Pod cannot be scheduled for to long
107+
log.Debug().Str("pod-name", p.GetName()).Msg("Pod scheduling timeout")
108+
podNamesWithScheduleTimeout = append(podNamesWithScheduleTimeout, p.GetName())
109+
} else if !k8sutil.IsPodScheduled(&p) {
110+
unscheduledPodNames = append(unscheduledPodNames, p.GetName())
111+
}
96112
if updateMemberStatusNeeded {
97113
if err := status.Members.UpdateMemberStatus(memberStatus, group); err != nil {
98114
return maskAny(err)
@@ -146,6 +162,22 @@ func (r *Resources) InspectPods() error {
146162
allMembersReady := status.Members.AllMembersReady()
147163
status.Conditions.Update(api.ConditionTypeReady, allMembersReady, "", "")
148164

165+
// Update conditions
166+
if len(podNamesWithScheduleTimeout) > 0 {
167+
if status.Conditions.Update(api.ConditionTypePodSchedulingFailure, true,
168+
"Pods Scheduling Timeout",
169+
fmt.Sprintf("The following pods cannot be scheduled: %v", podNamesWithScheduleTimeout)) {
170+
r.context.CreateEvent(k8sutil.NewPodsSchedulingFailureEvent(podNamesWithScheduleTimeout, r.context.GetAPIObject()))
171+
}
172+
} else if status.Conditions.IsTrue(api.ConditionTypePodSchedulingFailure) &&
173+
len(unscheduledPodNames) == 0 {
174+
if status.Conditions.Update(api.ConditionTypePodSchedulingFailure, false,
175+
"Pods Scheduling Resolved",
176+
"No pod reports a scheduling timeout") {
177+
r.context.CreateEvent(k8sutil.NewPodsSchedulingResolvedEvent(r.context.GetAPIObject()))
178+
}
179+
}
180+
149181
// Save status
150182
if err := r.context.UpdateStatus(status); err != nil {
151183
return maskAny(err)

pkg/util/k8sutil/events.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,25 @@ func NewImmutableFieldEvent(fieldName string, apiObject APIObject) *v1.Event {
7878
return event
7979
}
8080

81+
// NewPodsSchedulingFailureEvent creates an event indicating that one of more cannot be scheduled.
82+
func NewPodsSchedulingFailureEvent(unscheduledPodNames []string, apiObject APIObject) *v1.Event {
83+
event := newDeploymentEvent(apiObject)
84+
event.Type = v1.EventTypeNormal
85+
event.Reason = "Pods Scheduling Failure"
86+
event.Message = fmt.Sprintf("One or more pods are not scheduled in time. Pods: %v", unscheduledPodNames)
87+
return event
88+
}
89+
90+
// NewPodsSchedulingResolvedEvent creates an event indicating that an earlier problem with
91+
// pod scheduling has been resolved.
92+
func NewPodsSchedulingResolvedEvent(apiObject APIObject) *v1.Event {
93+
event := newDeploymentEvent(apiObject)
94+
event.Type = v1.EventTypeNormal
95+
event.Reason = "Pods Scheduling Resolved"
96+
event.Message = "All pods have been scheduled"
97+
return event
98+
}
99+
81100
// NewSecretsChangedEvent creates an event indicating that one of more secrets have changed.
82101
func NewSecretsChangedEvent(changedSecretNames []string, apiObject APIObject) *v1.Event {
83102
event := newDeploymentEvent(apiObject)

pkg/util/k8sutil/pods.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ package k8sutil
2525
import (
2626
"fmt"
2727
"path/filepath"
28+
"time"
2829

2930
"k8s.io/api/core/v1"
3031
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -87,6 +88,21 @@ func IsPodFailed(pod *v1.Pod) bool {
8788
return pod.Status.Phase == v1.PodFailed
8889
}
8990

91+
// IsPodScheduled returns true if the pod has been scheduled.
92+
func IsPodScheduled(pod *v1.Pod) bool {
93+
condition := getPodCondition(&pod.Status, v1.PodScheduled)
94+
return condition != nil && condition.Status == v1.ConditionTrue
95+
}
96+
97+
// IsPodNotScheduledFor returns true if the pod has not been scheduled
98+
// for longer than the given duration.
99+
func IsPodNotScheduledFor(pod *v1.Pod, timeout time.Duration) bool {
100+
condition := getPodCondition(&pod.Status, v1.PodScheduled)
101+
return condition != nil &&
102+
condition.Status == v1.ConditionFalse &&
103+
condition.LastTransitionTime.Time.Add(timeout).Before(time.Now())
104+
}
105+
90106
// IsArangoDBImageIDAndVersionPod returns true if the given pod is used for fetching image ID and ArangoDB version of an image
91107
func IsArangoDBImageIDAndVersionPod(p v1.Pod) bool {
92108
role, found := p.GetLabels()[LabelKeyRole]

0 commit comments

Comments
 (0)