@@ -251,18 +251,16 @@ int mca_coll_han_gather_lg_task(void *task_args)
251251 & rgap );
252252 tmp_buf = (char * ) malloc (rsize );
253253 tmp_rbuf = tmp_buf - rgap ;
254- if (t -> w_rank == t -> root ) {
255- if (MPI_IN_PLACE == t -> sbuf ) {
256- ptrdiff_t rextent ;
257- ompi_datatype_type_extent (dtype , & rextent );
258- ptrdiff_t block_size = rextent * (ptrdiff_t )count ;
259- ptrdiff_t src_shift = block_size * t -> w_rank ;
260- ptrdiff_t dest_shift = block_size * low_rank ;
261- ompi_datatype_copy_content_same_ddt (dtype ,
262- (ptrdiff_t )count ,
263- tmp_rbuf + dest_shift ,
264- (char * )t -> rbuf + src_shift );
265- }
254+ if (t -> w_rank == t -> root && MPI_IN_PLACE == t -> sbuf ) {
255+ ptrdiff_t rextent ;
256+ ompi_datatype_type_extent (dtype , & rextent );
257+ ptrdiff_t block_size = rextent * (ptrdiff_t )count ;
258+ ptrdiff_t src_shift = block_size * t -> w_rank ;
259+ ptrdiff_t dest_shift = block_size * low_rank ;
260+ ompi_datatype_copy_content_same_ddt (dtype ,
261+ (ptrdiff_t )count ,
262+ tmp_rbuf + dest_shift ,
263+ (char * )t -> rbuf + src_shift );
266264 }
267265 }
268266
@@ -405,6 +403,11 @@ mca_coll_han_gather_intra_simple(const void *sbuf, int scount,
405403 char * reorder_buf = NULL ; // allocated memory
406404 char * reorder_buf_start = NULL ; // start of the data
407405 if (w_rank == root ) {
406+ if (MPI_IN_PLACE == sbuf ) {
407+ ptrdiff_t rextent ;
408+ ompi_datatype_type_extent (rdtype , & rextent );
409+ sbuf = rbuf + rextent * (ptrdiff_t )rcount * w_rank ;
410+ }
408411 if (han_module -> is_mapbycore ) {
409412 reorder_buf_start = (char * )rbuf ;
410413 } else {
0 commit comments