@@ -416,6 +416,7 @@ struct ompi_comm_ishrink_context_t {
416416 ompi_group_t * failed_group ;
417417 ompi_group_t * alive_group ;
418418 ompi_group_t * alive_rgroup ;
419+ int flag ;
419420 double start ;
420421};
421422typedef struct ompi_comm_ishrink_context_t ompi_comm_ishrink_context_t ;
@@ -429,7 +430,6 @@ static int ompi_comm_ishrink_check_activate(ompi_comm_request_t *request);
429430int ompi_comm_ishrink_internal (ompi_communicator_t * comm , ompi_communicator_t * * newcomm , ompi_request_t * * req )
430431{
431432 int rc ;
432- int flag = 1 ;
433433#if OPAL_ENABLE_DEBUG
434434 double stop ;
435435#endif
@@ -479,7 +479,8 @@ int ompi_comm_ishrink_internal(ompi_communicator_t* comm, ompi_communicator_t**
479479 * the value of flag, instead we are only using the globally consistent
480480 * return value.
481481 */
482- rc = comm -> c_coll -> coll_iagree ( & flag ,
482+ context -> flag = 1 ;
483+ rc = comm -> c_coll -> coll_iagree ( & context -> flag ,
483484 1 ,
484485 & ompi_mpi_int .dt ,
485486 & ompi_mpi_op_band .op ,
@@ -508,7 +509,7 @@ static int ompi_comm_ishrink_check_agree(ompi_comm_request_t *request) {
508509 ompi_communicator_t * comm = context -> comm ;
509510 ompi_request_t * subreq [1 ];
510511 ompi_group_t * comm_group = NULL ;
511- int rc , flag = 1 ;
512+ int rc ;
512513#if OPAL_ENABLE_DEBUG
513514 double stop ;
514515#endif
@@ -522,13 +523,17 @@ static int ompi_comm_ishrink_check_agree(ompi_comm_request_t *request) {
522523 rc = request -> super .req_status .MPI_ERROR ;
523524 if ( (OMPI_SUCCESS != rc ) && (MPI_ERR_PROC_FAILED != rc ) ) {
524525 opal_output (0 , "%s:%d Agreement failure: %d\n" , __FILE__ , __LINE__ , rc );
526+ ompi_comm_request_return (request );
527+ OBJ_RELEASE (context -> failed_group );
525528 return rc ;
526529 }
527530
528531 if ( MPI_ERR_PROC_FAILED == rc ) {
529532 /* previous round found more failures, redo */
533+ OBJ_RELEASE (context -> failed_group );
530534 request -> super .req_status .MPI_ERROR = MPI_SUCCESS ;
531- rc = comm -> c_coll -> coll_iagree ( & flag ,
535+ context -> flag = 1 ;
536+ rc = comm -> c_coll -> coll_iagree ( & context -> flag ,
532537 1 ,
533538 & ompi_mpi_int .dt ,
534539 & ompi_mpi_op_band .op ,
@@ -575,7 +580,6 @@ static int ompi_comm_ishrink_check_agree(ompi_comm_request_t *request) {
575580 }
576581 }
577582 OBJ_RELEASE (context -> failed_group );
578- context -> failed_group = NULL ;
579583
580584 rc = ompi_comm_set_nb ( context -> newcomm , /* new comm */
581585 comm , /* old comm */
@@ -614,15 +618,16 @@ static int ompi_comm_ishrink_check_setrank(ompi_comm_request_t *request) {
614618
615619 /* cleanup temporary groups */
616620 OBJ_RELEASE (context -> alive_group );
617- context -> alive_group = NULL ;
618621 if ( NULL != context -> alive_rgroup ) {
619622 OBJ_RELEASE (context -> alive_rgroup );
620623 }
621- context -> alive_rgroup = NULL ;
622624
623625 /* check errors in prior step */
624- if ( NULL == * context -> newcomm ) {
625- rc = MPI_ERR_INTERN ;
626+ rc = request -> super .req_status .MPI_ERROR ;
627+ if ( OMPI_SUCCESS != rc ) {
628+ opal_output_verbose (1 , ompi_ftmpi_output_handle ,
629+ "%s ompi: comm_ishrink: Construction failed with error %d" ,
630+ OMPI_NAME_PRINT (OMPI_PROC_MY_NAME ), rc );
626631 ompi_comm_request_return (request );
627632 OBJ_RELEASE (* context -> newcomm );
628633 return rc ;
@@ -719,6 +724,7 @@ static int ompi_comm_ishrink_check_cid(ompi_comm_request_t *request) {
719724 mode ,
720725 subreq );
721726 if ( OMPI_SUCCESS != rc ) {
727+ ompi_comm_request_return (request );
722728 OBJ_RELEASE (* context -> newcomm );
723729 return rc ;
724730 }
@@ -729,18 +735,23 @@ static int ompi_comm_ishrink_check_cid(ompi_comm_request_t *request) {
729735}
730736
731737static int ompi_comm_ishrink_check_activate (ompi_comm_request_t * request ) {
738+ ompi_comm_ishrink_context_t * context =
739+ (ompi_comm_ishrink_context_t * )request -> context ;
732740 int rc ;
733741#if OPAL_ENABLE_DEBUG
734742 double stop ;
735743#endif
736744
737745 rc = request -> super .req_status .MPI_ERROR ;
738746 if ( OMPI_SUCCESS != rc ) {
747+ opal_output_verbose (1 , ompi_ftmpi_output_handle ,
748+ "%s ompi: comm_ishrink: Activation failed with error %d" ,
749+ OMPI_NAME_PRINT (OMPI_PROC_MY_NAME ), rc );
750+ ompi_comm_request_return (request );
751+ OBJ_RELEASE (* context -> newcomm );
739752 return rc ;
740753 }
741754#if OPAL_ENABLE_DEBUG
742- ompi_comm_ishrink_context_t * context =
743- (ompi_comm_ishrink_context_t * )request -> context ;
744755 stop = MPI_Wtime ();
745756 OPAL_OUTPUT_VERBOSE ((10 , ompi_ftmpi_output_handle ,
746757 "%s ompi: comm_ishrink: COLL SELECT: %g seconds\n" ,
0 commit comments