@@ -209,9 +209,14 @@ public async Task<int> DockerStart(IExecutionContext context, string containerId
209209 ArgUtil . NotNull ( context , nameof ( context ) ) ;
210210 ArgUtil . NotNull ( containerId , nameof ( containerId ) ) ;
211211
212- var action = new Func < Task < int > > ( async ( ) => await ExecuteDockerCommandAsync ( context , "start" , containerId , context . CancellationToken ) ) ;
213- const string command = "Docker start" ;
214- return await ExecuteDockerCommandAsyncWithRetries ( context , action , command ) ;
212+ if ( ! AgentKnobs . CheckBeforeRetryDockerStart . GetValue ( context ) . AsBoolean ( ) )
213+ {
214+ var action = new Func < Task < int > > ( async ( ) => await ExecuteDockerCommandAsync ( context , "start" , containerId , context . CancellationToken ) ) ;
215+ const string command = "Docker start" ;
216+ return await ExecuteDockerCommandAsyncWithRetries ( context , action , command ) ;
217+ }
218+ // Use the new helper for start with retries and running-state checks
219+ return await ExecuteDockerStartWithRetriesAndCheck ( context , containerId ) ;
215220 }
216221
217222 public async Task < int > DockerRemove ( IExecutionContext context , string containerId )
@@ -533,5 +538,60 @@ private static async Task<List<string>> ExecuteDockerCommandAsyncWithRetries(IEx
533538
534539 return output ;
535540 }
541+
542+ /// <summary>
543+ /// Executes 'docker start' with retries, checking if the container is already running before each retry.
544+ /// Returns 0 if the container is running or started successfully, otherwise returns the last exit code.
545+ /// </summary>
546+ private async Task < int > ExecuteDockerStartWithRetriesAndCheck ( IExecutionContext context , string containerId )
547+ {
548+ bool dockerActionRetries = AgentKnobs . DockerActionRetries . GetValue ( context ) . AsBoolean ( ) ;
549+ context . Output ( $ "DockerActionRetries variable value: { dockerActionRetries } ") ;
550+
551+ int retryCount = 0 ;
552+ const int maxRetries = 3 ;
553+ TimeSpan delayInSeconds = TimeSpan . FromSeconds ( 10 ) ;
554+ int exitCode = 0 ;
555+
556+ while ( retryCount < maxRetries )
557+ {
558+ // Check if container is already running before attempting to start
559+ if ( await IsContainerRunning ( context , containerId ) )
560+ {
561+ context . Output ( $ "Container { containerId } is running before attempt { retryCount + 1 } .") ;
562+ break ;
563+ }
564+
565+ exitCode = await ExecuteDockerCommandAsync ( context , "start" , containerId , context . CancellationToken ) ;
566+ if ( exitCode == 0 || ! dockerActionRetries )
567+ {
568+ break ;
569+ }
570+
571+ context . Warning ( $ "Docker start failed with exit code { exitCode } , back off { delayInSeconds } seconds before retry.") ;
572+ retryCount ++ ;
573+ await Task . Delay ( delayInSeconds ) ;
574+
575+ }
576+
577+ // handle the case where container is already running after retries but exit code is not 0
578+ if ( exitCode != 0 && await IsContainerRunning ( context , containerId ) )
579+ {
580+ context . Output ( $ "Container { containerId } is already running after { retryCount } retries. but exit code was { exitCode } .") ;
581+ exitCode = 0 ; // Indicate success
582+ }
583+ // If the container is still not running after retries, log a warning
584+ if ( exitCode != 0 )
585+ {
586+ context . Warning ( $ "Container { containerId } is not running after { retryCount } retries. Last exit code: { exitCode } ") ;
587+ }
588+ else
589+ {
590+ context . Output ( $ "Container { containerId } started successfully after { retryCount } retries.") ;
591+ }
592+ //return the exit code
593+ context . Debug ( $ "Docker start completed with exit code { exitCode } .") ;
594+ return exitCode ;
595+ }
536596 }
537597}
0 commit comments