@@ -216,6 +216,66 @@ func GetClusterSecret(ctx context.Context, kubeFactory kube.Factory, namespace s
216216 return res , nil
217217}
218218
219+ func WaitForJob (ctx context.Context , f kube.Factory , ns , jobName string ) error {
220+ var attempt int32
221+ return f .Wait (ctx , & kube.WaitOptions {
222+ Interval : time .Second * 10 ,
223+ Timeout : time .Minute * 11 , // BackOffLimit of 6 is a total of 630s, or 10m30s
224+ Resources : []kube.Resource {
225+ {
226+ Name : jobName ,
227+ Namespace : ns ,
228+ WaitFunc : func (ctx context.Context , f kube.Factory , ns , name string ) (bool , error ) {
229+ cs , err := f .KubernetesClientSet ()
230+ if err != nil {
231+ return false , err
232+ }
233+
234+ j , err := cs .BatchV1 ().Jobs (ns ).Get (ctx , name , metav1.GetOptions {})
235+ if err != nil {
236+ return false , err
237+ }
238+
239+ if j .Status .Failed > attempt {
240+ attempt = j .Status .Failed
241+ log .G (ctx ).Warnf ("Attempt #%d/%d failed:" , attempt , * j .Spec .BackoffLimit )
242+ printJobLogs (ctx , cs , j )
243+ } else if j .Status .Succeeded == 1 {
244+ attempt += 1
245+ log .G (ctx ).Infof ("Attempt #%d/%d succeeded:" , attempt , * j .Spec .BackoffLimit )
246+ printJobLogs (ctx , cs , j )
247+ }
248+
249+ for _ , cond := range j .Status .Conditions {
250+ if cond .Type == batchv1 .JobFailed {
251+ err = fmt .Errorf ("add-cluster-job failed after %d attempts" , j .Status .Failed )
252+ break
253+ }
254+ }
255+
256+ return j .Status .Succeeded == 1 || j .Status .Failed == * j .Spec .BackoffLimit , err
257+ },
258+ },
259+ },
260+ })
261+ }
262+
263+ func printJobLogs (ctx context.Context , client kubernetes.Interface , job * batchv1.Job ) {
264+ p , err := getPodByJob (ctx , client , job )
265+ if err != nil {
266+ log .G (ctx ).Errorf ("Failed getting pod for job: $s" , err .Error ())
267+ return
268+ }
269+
270+ logs , err := getPodLogs (ctx , client , p .GetNamespace (), p .GetName ())
271+ if err != nil {
272+ log .G (ctx ).Errorf ("Failed getting logs for pod: $s" , err .Error ())
273+ return
274+ }
275+
276+ fmt .Printf ("=====\n %s\n =====\n \n " , logs )
277+ }
278+
219279func runNetworkTest (ctx context.Context , kubeFactory kube.Factory , urls ... string ) error {
220280 const networkTestsTimeout = 120 * time .Second
221281
@@ -426,8 +486,8 @@ func deleteJob(ctx context.Context, client kubernetes.Interface, job *batchv1.Jo
426486}
427487
428488func getPodByJob (ctx context.Context , client kubernetes.Interface , job * batchv1.Job ) (* v1.Pod , error ) {
429- pods , err := client .CoreV1 ().Pods (store . Get (). DefaultNamespace ).List (ctx , metav1.ListOptions {
430- LabelSelector : "controller-uid=" + job .GetLabels () ["controller-uid" ],
489+ pods , err := client .CoreV1 ().Pods (job . GetNamespace () ).List (ctx , metav1.ListOptions {
490+ LabelSelector : "controller-uid=" + job .Spec . Selector . MatchLabels ["controller-uid" ],
431491 })
432492 if err != nil {
433493 return nil , err
0 commit comments