@@ -401,8 +401,7 @@ spec:
401401 rules:
402402 - action: Ignore
403403 onPodConditions:
404- operator: In
405- values: [ DisruptionTarget ]
404+ - type: DisruptionTarget
406405` ` `
407406
408407Note that, in this case the user supplies a list of Pod condition type values.
@@ -752,15 +751,16 @@ type PodFailurePolicyAction string
752751const (
753752 // This is an action which might be taken on a pod failure - mark the
754753 // pod's job as Failed and terminate all running pods.
755- PodFailurePolicyActionTerminate PodFailurePolicyAction = "Terminate "
754+ PodFailurePolicyActionFailJob PodFailurePolicyAction = "FailJob "
756755
757756 // This is an action which might be taken on a pod failure - the counter towards
758- // .backoffLimit is not incremented and a replacement pod is created.
757+ // .backoffLimit, represented by the job's .status.failed field, is not
758+ // incremented and a replacement pod is created.
759759 PodFailurePolicyActionIgnore PodFailurePolicyAction = "Ignore"
760760
761761 // This is an action which might be taken on a pod failure - the pod failure
762- // is handled in the default way - the counter towards .backoffLimit is
763- // incremented.
762+ // is handled in the default way - the counter towards .backoffLimit,
763+ // represented by the job's .status.failed field, is incremented.
764764 PodFailurePolicyActionCount PodFailurePolicyAction = "Count"
765765)
766766
@@ -772,15 +772,20 @@ const (
772772)
773773
774774// PodFailurePolicyOnExitCodesRequirement describes the requirement for handling
775- // a failed pod based on its container exit codes.
775+ // a failed pod based on its container exit codes. In particular, it lookups the
776+ // .state.terminated.exitCode for each app container and init container status,
777+ // represented by the .status.containerStatuses and .status.initContainerStatuses
778+ // fields in the Pod status, respectively. Containers completed with success
779+ // (exit code 0) are excluded from the requirement check.
776780type PodFailurePolicyOnExitCodesRequirement struct {
777781 // Restricts the check for exit codes to the container with the
778782 // specified name. When null, the rule applies to all containers.
779783 // +optional
780784 ContainerName *string
781785
782786 // Represents the relationship between the container exit code(s) and the
783- // specified values. Possible values are:
787+ // specified values. Containers completed with success (exit code 0) are
788+ // excluded from the requirement check.Possible values are:
784789 // - In: the requirement is satisfied if at least one container exit code
785790 // (might be multiple if there are multiple containers not restricted
786791 // by the 'containerName' field) is in the set of specified values.
@@ -791,39 +796,26 @@ type PodFailurePolicyOnExitCodesRequirement struct {
791796
792797 // Specifies the set of values. Each returned container exit code (might be
793798 // multiple in case of multiple containers) is checked against this set of
794- // values with respect to the operator.
799+ // values with respect to the operator. Value '0' cannot be used for the In
800+ // operator.
795801 // +listType=set
796802 Values []int32
797803}
798804
799- type PodFailurePolicyOnPodConditionsOperator string
800-
801- const (
802- PodFailurePolicyOnPodConditionsOpIn PodFailurePolicyOnPodConditionsOperator = "In"
803- )
804-
805- // PodFailurePolicyOnPodConditionsRequirement describes the requirement for handling
806- // a failed pod based on its conditions.
807- type PodFailurePolicyOnPodConditionsRequirement struct {
808- // Represents the relationship between the set of actual Pod condition types
809- // and the set of specified Pod condition types. Possible values are:
810- // - In: the requirement is satisfied, if at least one actual Pod condition
811- // type (for a condition with status=True) is present in the set of
812- // specified Pod condition types.
813- Operator PodFailurePolicyOnPodConditionsOperator
814-
815- // Specifies the set of values. Each actual pod condition type,
816- // with status=True, is checked against this set with respect to the operator.
817- // +listType=set
818- Values []api.PodConditionType
805+ // PodFailurePolicyOnPodConditionsPattern describes a pattern for matching
806+ // an actual pod condition type.
807+ type PodFailurePolicyOnPodConditionsPattern struct {
808+ // Specifies the required Pod condition type. The pattern matches a pod condition
809+ // if the specified type equals the pod condition type.
810+ Type api.PodConditionType
819811}
820812
821813// PodFailurePolicyRule describes how a pod failure is handled when the requirements are met.
822814// Only one of OnExitCodes and onPodConditions can be used in each rule.
823815type PodFailurePolicyRule struct {
824816 // Specifies the action taken on a pod failure when the requirements are satisfied.
825817 // Possible values are:
826- // - Terminate : indicates that the pod's job is marked as Failed and all
818+ // - FailJob : indicates that the pod's job is marked as Failed and all
827819 // running pods are terminated.
828820 // - Ignore: indicates that the counter towards the .backoffLimit is not
829821 // incremented and a replacement pod is created.
@@ -835,9 +827,11 @@ type PodFailurePolicyRule struct {
835827 // +optional
836828 OnExitCodes *PodFailurePolicyOnExitCodesRequirement
837829
838- // Represents the requirement on the pod conditions.
839- // +optional
840- OnPodConditions *PodFailurePolicyOnPodConditionsRequirement
830+ // Represents the requirement on the pod conditions. The requirement is represented
831+ // as a list of pod condition patterns. The requirement is satisfied if at
832+ // least pattern matches an actual pod condition.
833+ // +listType=atomic
834+ OnPodConditions []PodFailurePolicyOnPodConditionsPattern
841835}
842836
843837// PodFailurePolicy describes how failed pods influence the backoffLimit.
@@ -857,8 +851,13 @@ type JobSpec struct {
857851 // Specifies the policy of handling failed pods. In particular, it allows to
858852 // specify the set of actions and conditions which need to be
859853 // satisfied to take the associated action.
860- // If empty, the default behaviour applies - the counter of pod failed is
861- // incremented and it is checked against the backoffLimit.
854+ // If empty, the default behaviour applies - the counter of failed pods,
855+ // represented by the jobs's .status.failed field, is incremented and it is
856+ // checked against the backoffLimit. This field cannot be used in combination
857+ // with restartPolicy=OnFailure.
858+ //
859+ // This field is alpha-level. To use this field, you must enable the
860+ // ` JobPodFailurePolicy` feature gate (disabled by default).
862861 // +optional
863862 PodFailurePolicy *PodFailurePolicy
864863 ...
@@ -899,8 +898,7 @@ spec:
899898 values : [1,2,3]
900899 - action : Ignore
901900 onPodConditions :
902- operator: In
903- values: [ DisruptionTarget ]
901+ - type : DisruptionTarget
904902` ` `
905903
906904### Evaluation
@@ -1086,6 +1084,9 @@ Below are some examples to consider, in addition to the aforementioned [maturity
10861084 indicating that a pod should be retried (see : [Evolving condition types](#evolving-condition-types))
10871085- Simplify the code in job controller responsible for detection of failed pods
10881086 based on the fix for pods stuck in the running phase (see : [Marking pods as Failed](marking-pods-as-failed))
1087+ - Commonize the code for appending pod conditions between components
1088+ - Do not update the pod disruption condition (with type=`DisruptionTarget`) if
1089+ it is already present with `status=True`
10891090- Review and implement if feasible adding of pod conditions with the use of
10901091 [SSA](https://kubernetes.io/docs/reference/using-api/server-side-apply/) client.
10911092- The feature flag enabled by default
0 commit comments