@@ -112,15 +112,14 @@ static int nbc_allreduce_init(const void* sendbuf, void* recvbuf, int count, MPI
112112 return OMPI_ERR_OUT_OF_RESOURCE ;
113113 }
114114
115+ alg = NBC_ARED_RING ; /* default generic selection */
115116 /* algorithm selection */
116117 int nprocs_pof2 = opal_next_poweroftwo (p ) >> 1 ;
117118 if (libnbc_iallreduce_algorithm == 0 ) {
118119 if (p < 4 || size * count < 65536 || !ompi_op_is_commute (op ) || inplace ) {
119120 alg = NBC_ARED_BINOMIAL ;
120121 } else if (count >= nprocs_pof2 && ompi_op_is_commute (op )) {
121122 alg = NBC_ARED_REDSCAT_ALLGATHER ;
122- } else {
123- alg = NBC_ARED_RING ;
124123 }
125124 } else {
126125 if (libnbc_iallreduce_algorithm == 1 )
@@ -131,8 +130,6 @@ static int nbc_allreduce_init(const void* sendbuf, void* recvbuf, int count, MPI
131130 alg = NBC_ARED_REDSCAT_ALLGATHER ;
132131 else if (libnbc_iallreduce_algorithm == 4 )
133132 alg = NBC_ARED_RDBL ;
134- else
135- alg = NBC_ARED_RING ;
136133 }
137134#ifdef NBC_CACHE_SCHEDULE
138135 /* search schedule in communicator specific tree */
@@ -633,38 +630,37 @@ static inline int allred_sched_recursivedoubling(int rank, int p, const void *se
633630 return OMPI_SUCCESS ;
634631}
635632
636- static inline int allred_sched_ring (int r , int p , int count , MPI_Datatype datatype , const void * sendbuf , void * recvbuf , MPI_Op op ,
637- int size , int ext , NBC_Schedule * schedule , void * tmpbuf ) {
633+ static inline int
634+ allred_sched_ring (int r , int p ,
635+ int count , MPI_Datatype datatype , const void * sendbuf , void * recvbuf ,
636+ MPI_Op op , int size , int ext , NBC_Schedule * schedule , void * tmpbuf )
637+ {
638638 int segsize , * segsizes , * segoffsets ; /* segment sizes and offsets per segment (number of segments == number of nodes */
639- int speer , rpeer ; /* send and recvpeer */
639+ int speer , rpeer ; /* send and recv peers */
640640 int res = OMPI_SUCCESS ;
641641
642- if (count == 0 ) {
642+ if (0 == count ) {
643643 return OMPI_SUCCESS ;
644644 }
645645
646- segsizes = (int * ) malloc (sizeof (int ) * p );
647- segoffsets = (int * ) malloc (sizeof (int ) * p );
648- if (NULL == segsizes || NULL == segoffsets ) {
649- free (segsizes );
650- free (segoffsets );
646+ segsizes = (int * ) malloc ((2 * p + 1 ) * sizeof (int ));
647+ if (NULL == segsizes ) {
651648 return OMPI_ERR_OUT_OF_RESOURCE ;
652649 }
650+ segoffsets = segsizes + p ;
653651
654- segsize = (count + p - 1 ) / p ; /* size of the segments */
652+ segsize = count / p ; /* size of the segments across the last ranks.
653+ The remainder will be evenly distributed across the smaller ranks */
655654
656655 segoffsets [0 ] = 0 ;
657- for (int i = 0 , mycount = count ; i < p ; ++ i ) {
658- mycount -= segsize ;
656+ for (int i = 0 , mycount = count % p ; i < p ; ++ i ) {
659657 segsizes [i ] = segsize ;
660- if ( mycount < 0 ) {
661- segsizes [i ] = segsize + mycount ;
662- mycount = 0 ;
658+ if ( mycount > 0 ) { /* We have extra segments to distribute */
659+ segsizes [i ]++ ;
660+ mycount -- ;
663661 }
664662
665- if (i ) {
666- segoffsets [i ] = segoffsets [i - 1 ] + segsizes [i - 1 ];
667- }
663+ segoffsets [i + 1 ] = segoffsets [i ] + segsizes [i ];
668664 }
669665
670666 /* reduce peers */
@@ -786,28 +782,29 @@ static inline int allred_sched_ring (int r, int p, int count, MPI_Datatype datat
786782 }
787783
788784 if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
789- break ;
785+ goto free_and_return ;
790786 }
791-
792- res = NBC_Sched_recv ((char * ) recvbuf + roffset , false, segsizes [relement ], datatype , rpeer ,
793- schedule , true);
794- if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
795- break ;
787+ if ( recvbuf != sendbuf ) { /* check for MPI_IN_PLACE */
788+ res = NBC_Sched_recv ((char * ) recvbuf + roffset , false, segsizes [relement ], datatype , rpeer ,
789+ schedule , true);
790+ if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
791+ goto free_and_return ;
792+ }
793+ res = NBC_Sched_op ((char * ) sendbuf + roffset , false, (char * ) recvbuf + roffset , false,
794+ segsizes [relement ], datatype , op , schedule , true);
795+ } else {
796+ res = NBC_Sched_recv ((char * ) tmpbuf , false, segsizes [relement ], datatype , rpeer ,
797+ schedule , true);
798+ if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
799+ goto free_and_return ;
800+ }
801+ res = NBC_Sched_op ((char * ) tmpbuf , false, (char * ) recvbuf + roffset , false,
802+ segsizes [relement ], datatype , op , schedule , true);
796803 }
797-
798- res = NBC_Sched_op ((char * ) sendbuf + roffset , false, (char * ) recvbuf + roffset , false,
799- segsizes [relement ], datatype , op , schedule , true);
800804 if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
801- break ;
805+ goto free_and_return ;
802806 }
803807 }
804-
805- if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
806- free (segsizes );
807- free (segoffsets );
808- return res ;
809- }
810-
811808 for (int round = p - 1 ; round < 2 * p - 2 ; ++ round ) {
812809 int selement = (r + 1 - round + 2 * p /*2*p avoids negative mod*/ )%p ; /* the element I am sending */
813810 int soffset = segoffsets [selement ]* ext ;
@@ -819,16 +816,14 @@ static inline int allred_sched_ring (int r, int p, int count, MPI_Datatype datat
819816 if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
820817 break ;
821818 }
822-
823819 res = NBC_Sched_recv ((char * ) recvbuf + roffset , false, segsizes [relement ], datatype , rpeer ,
824820 schedule , true);
825821 if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
826822 break ;
827823 }
828824 }
829-
825+ free_and_return :
830826 free (segsizes );
831- free (segoffsets );
832827
833828 return res ;
834829}
0 commit comments