@@ -189,32 +189,11 @@ mca_coll_han_gather_intra(const void *sbuf, int scount,
189189
190190 ompi_request_wait (& temp_request , MPI_STATUS_IGNORE );
191191
192- /* Suppose, the expected message is 0 1 2 3 4 5 6 7 but the processes are
193- * mapped on 2 nodes, for example |0 2 4 6| |1 3 5 7|. The messages from
194- * low gather will be 0 2 4 6 and 1 3 5 7.
195- * So the upper gather result is 0 2 4 6 1 3 5 7 which must be reordered.
196- * The 3rd element (4) must be recopied at the 4th place. In general, the
197- * i-th element must be recopied at the place given by the i-th entry of the
198- * topology, which is topo[i*topolevel +1]
199- */
200192 /* reorder rbuf based on rank */
201193 if (w_rank == root && !han_module -> is_mapbycore ) {
202- ptrdiff_t rextent ;
203- ompi_datatype_type_extent (rdtype , & rextent );
204- for (int i = 0 ; i < w_size ; i ++ ) {
205- OPAL_OUTPUT_VERBOSE ((30 , mca_coll_han_component .han_output ,
206- "[%d]: Han Gather copy from %d to %d\n" ,
207- w_rank ,
208- i * 2 + 1 ,
209- topo [i * 2 + 1 ]));
210- ptrdiff_t block_size = rextent * (ptrdiff_t )rcount ;
211- ptrdiff_t src_shift = block_size * i ;
212- ptrdiff_t dest_shift = block_size * (ptrdiff_t )topo [i * 2 + 1 ];
213- ompi_datatype_copy_content_same_ddt (rdtype ,
214- (ptrdiff_t )rcount ,
215- reorder_rbuf + src_shift ,
216- (char * )rbuf + dest_shift );
217- }
194+ ompi_coll_han_reorder_gather (reorder_buf ,
195+ rbuf , rcount , rdtype ,
196+ comm , topo );
218197 free (reorder_buf );
219198 }
220199
@@ -227,15 +206,8 @@ int mca_coll_han_gather_lg_task(void *task_args)
227206 mca_coll_han_gather_args_t * t = (mca_coll_han_gather_args_t * ) task_args ;
228207 OPAL_OUTPUT_VERBOSE ((30 , mca_coll_han_component .han_output , "[%d] Han Gather: lg\n" ,
229208 t -> w_rank ));
230- ompi_datatype_t * dtype ;
231- size_t count ;
232- if (t -> w_rank == t -> root ) {
233- dtype = t -> rdtype ;
234- count = t -> rcount ;
235- } else {
236- dtype = t -> sdtype ;
237- count = t -> scount ;
238- }
209+ ompi_datatype_t * dtype = (t -> w_rank == t -> root ) ? t -> rdtype : t -> sdtype ;
210+ size_t count = (t -> w_rank == t -> root ) ? t -> rcount : t -> scount ;
239211
240212 /* If the process is one of the node leader */
241213 char * tmp_buf = NULL ;
@@ -299,15 +271,8 @@ int mca_coll_han_gather_ug_task(void *task_args)
299271 OPAL_OUTPUT_VERBOSE ((30 , mca_coll_han_component .han_output ,
300272 "[%d] Han Gather: ug noop\n" , t -> w_rank ));
301273 } else {
302- ompi_datatype_t * dtype ;
303- size_t count ;
304- if (t -> w_rank == t -> root ) {
305- dtype = t -> rdtype ;
306- count = t -> rcount ;
307- } else {
308- dtype = t -> sdtype ;
309- count = t -> scount ;
310- }
274+ ompi_datatype_t * dtype = (t -> w_rank == t -> root ) ? t -> rdtype : t -> sdtype ;
275+ size_t count = (t -> w_rank == t -> root ) ? t -> rcount : t -> scount ;
311276
312277
313278 int low_size = ompi_comm_size (t -> low_comm );
@@ -375,17 +340,9 @@ mca_coll_han_gather_intra_simple(const void *sbuf, int scount,
375340
376341 ompi_communicator_t * low_comm = han_module -> sub_comm [INTRA_NODE ];
377342 ompi_communicator_t * up_comm = han_module -> sub_comm [INTER_NODE ];
378- ompi_datatype_t * dtype ;
379- size_t count ;
380-
381- if (w_rank == root ) {
382- dtype = rdtype ;
383- count = rcount ;
384- } else {
385- dtype = sdtype ;
386- count = scount ;
387- }
388343
344+ ompi_datatype_t * dtype = (w_rank == root ) ? rdtype : sdtype ;
345+ size_t count = (w_rank == root ) ? rcount : scount ;
389346
390347 /* Get the 'virtual ranks' mapping corresponding to the communicators */
391348 int * vranks = han_module -> cached_vranks ;
@@ -403,10 +360,10 @@ mca_coll_han_gather_intra_simple(const void *sbuf, int scount,
403360 char * reorder_buf = NULL ; // allocated memory
404361 char * reorder_buf_start = NULL ; // start of the data
405362 if (w_rank == root ) {
406- if (MPI_IN_PLACE == sbuf ) {
363+ if (MPI_IN_PLACE == sbuf ) {
407364 ptrdiff_t rextent ;
408365 ompi_datatype_type_extent (rdtype , & rextent );
409- sbuf = rbuf + rextent * (ptrdiff_t )rcount * w_rank ;
366+ sbuf = ( char * ) rbuf + rextent * (ptrdiff_t )rcount * w_rank ;
410367 }
411368 if (han_module -> is_mapbycore ) {
412369 reorder_buf_start = (char * )rbuf ;
@@ -507,13 +464,11 @@ ompi_coll_han_reorder_gather(const void *sbuf,
507464 int w_size = ompi_comm_size (comm );
508465 ptrdiff_t rextent ;
509466 ompi_datatype_type_extent (dtype , & rextent );
467+ const ptrdiff_t block_size = rextent * (ptrdiff_t )count ;
510468 for ( i = 0 ; i < w_size ; i ++ ) {
511469 OPAL_OUTPUT_VERBOSE ((30 , mca_coll_han_component .han_output ,
512- "[%d]: Future reorder from %d to %d\n" ,
513- w_rank ,
514- i * topolevel + 1 ,
515- topo [i * topolevel + 1 ]));
516- ptrdiff_t block_size = rextent * (ptrdiff_t )count ;
470+ "[%d]: HAN Gather reorder from %d to %d\n" ,
471+ w_rank , i , topo [i * topolevel + 1 ]));
517472 ptrdiff_t src_shift = block_size * i ;
518473 ptrdiff_t dest_shift = block_size * (ptrdiff_t )topo [i * topolevel + 1 ];
519474 ompi_datatype_copy_content_same_ddt (dtype ,
0 commit comments