File tree Expand file tree Collapse file tree 1 file changed +7
-0
lines changed Expand file tree Collapse file tree 1 file changed +7
-0
lines changed Original file line number Diff line number Diff line change @@ -397,6 +397,7 @@ def run_command(
397397 shell = True ,
398398 )
399399 # Stream the outputs
400+ logger .debug ("Streaming command output from subprocess %s" , process .pid )
400401 while True :
401402 output = process .stdout .readline ()
402403 if process .poll () is not None and output == b"" :
@@ -411,9 +412,15 @@ def run_command(
411412 # logging will add line break
412413 msg = msg .rstrip ("\n " )
413414 logger .log (level = level , msg = msg )
415+ if "pdsh@" in msg and "ssh exited with exit code 1" in msg :
416+ print ("DeepSpeed Failed." )
417+ sys .exit (1 )
414418 # Add a small delay so that
415419 # outputs from the subsequent code will have different timestamp for oci logging
416420 time .sleep (0.02 )
421+ logger .debug (
422+ "subprocess %s returned exit code %s" , process .pid , process .returncode
423+ )
417424 if check and process .returncode != 0 :
418425 # If there is an error, exit the main process with the same return code.
419426 sys .exit (process .returncode )
You can’t perform that action at this time.
0 commit comments