@@ -331,6 +331,13 @@ static inline int ompi_osc_rdma_gacc_amo (ompi_osc_rdma_module_t *module, ompi_o
331331 return OMPI_SUCCESS ;
332332}
333333
334+ static inline __opal_attribute_always_inline__ bool ompi_osc_rdma_is_atomic_size_supported (uint64_t remote_addr ,
335+ size_t size )
336+ {
337+ return ((sizeof (uint32_t ) == size && !(remote_addr & 0x3 )) ||
338+ (sizeof (uint64_t ) == size && !(remote_addr & 0x7 )));
339+ }
340+
334341static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t * sync , const void * source , int source_count ,
335342 ompi_datatype_t * source_datatype , void * result , int result_count ,
336343 ompi_datatype_t * result_datatype , opal_convertor_t * result_convertor ,
@@ -339,19 +346,21 @@ static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t *sync, const v
339346 ompi_datatype_t * target_datatype , ompi_op_t * op , ompi_osc_rdma_request_t * request )
340347{
341348 ompi_osc_rdma_module_t * module = sync -> module ;
342- unsigned long len = target_count * target_datatype -> super .size ;
349+ size_t target_dtype_size = target_datatype -> super .size ;
350+ unsigned long len = target_count * target_dtype_size ;
343351 char * ptr = NULL ;
344352 int ret ;
345353
346- request -> len = target_datatype -> super . size * module -> network_amo_max_count ;
354+ request -> len = target_dtype_size * module -> network_amo_max_count ;
347355
348356 OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_TRACE , "initiating accumulate on contiguous region of %lu bytes to remote address %" PRIx64
349357 ", sync %p" , len , target_address , (void * ) sync );
350358
351359 /* if the datatype is small enough (and the count is 1) then try to directly use the hardware to execute
352360 * the atomic operation. this should be safe in all cases as either 1) the user has assured us they will
353361 * never use atomics with count > 1, 2) we have the accumulate lock, or 3) we have an exclusive lock */
354- if ((target_datatype -> super .size <= 8 ) && (((unsigned long ) target_count ) <= module -> network_amo_max_count )) {
362+ if ((target_dtype_size <= 8 ) && (((unsigned long ) target_count ) <= module -> network_amo_max_count ) &&
363+ ompi_osc_rdma_is_atomic_size_supported (target_address , target_dtype_size )) {
355364 ret = ompi_osc_rdma_gacc_amo (module , sync , source , result , result_count , result_datatype , result_convertor ,
356365 peer , target_address , target_handle , target_count , target_datatype , op , request );
357366 if (OPAL_LIKELY (OMPI_SUCCESS == ret )) {
0 commit comments