3535#include <unistd.h>
3636#include <math.h>
3737
38-
3938int mca_common_ompio_file_write (ompio_file_t * fh ,
4039 const void * buf ,
4140 int count ,
@@ -70,16 +69,34 @@ int mca_common_ompio_file_write (ompio_file_t *fh,
7069 return ret ;
7170 }
7271
72+ bool need_to_copy = false;
73+
7374#if OPAL_CUDA_SUPPORT
7475 int is_gpu , is_managed ;
7576 mca_common_ompio_check_gpu_buf ( fh , buf , & is_gpu , & is_managed );
7677 if ( is_gpu && !is_managed ) {
78+ need_to_copy = true;
79+ }
80+ #endif
81+
82+ if ( !( fh -> f_flags & OMPIO_DATAREP_NATIVE ) &&
83+ !(datatype == & ompi_mpi_byte .dt ||
84+ datatype == & ompi_mpi_char .dt )) {
85+ /* only need to copy if any of these conditions are given:
86+ 1. buffer is an unmanaged CUDA buffer (checked above).
87+ 2. Datarepresentation is anything other than 'native' and
88+ 3. datatype is not byte or char (i.e it does require some actual
89+ work to be done e.g. for external32.
90+ */
91+ need_to_copy = true;
92+ }
93+
94+ if ( need_to_copy ) {
7795 size_t pos = 0 ;
7896 char * tbuf = NULL ;
7997 opal_convertor_t convertor ;
8098
81- OMPIO_CUDA_PREPARE_BUF (fh ,buf ,count ,datatype ,tbuf ,& convertor ,max_data ,decoded_iov ,iov_count );
82-
99+ OMPIO_PREPARE_BUF (fh ,buf ,count ,datatype ,tbuf ,& convertor ,max_data ,decoded_iov ,iov_count );
83100 opal_convertor_pack (& convertor , decoded_iov , & iov_count , & pos );
84101 opal_convertor_cleanup ( & convertor );
85102 }
@@ -93,16 +110,7 @@ int mca_common_ompio_file_write (ompio_file_t *fh,
93110 & decoded_iov ,
94111 & iov_count );
95112 }
96- #else
97- mca_common_ompio_decode_datatype (fh ,
98- datatype ,
99- count ,
100- buf ,
101- & max_data ,
102- fh -> f_mem_convertor ,
103- & decoded_iov ,
104- & iov_count );
105- #endif
113+
106114 if ( 0 < max_data && 0 == fh -> f_iov_count ) {
107115 if ( MPI_STATUS_IGNORE != status ) {
108116 status -> _ucount = 0 ;
@@ -230,16 +238,34 @@ int mca_common_ompio_file_iwrite (ompio_file_t *fh,
230238 int i = 0 ; /* index into the decoded iovec of the buffer */
231239 int j = 0 ; /* index into the file vie iovec */
232240
241+ bool need_to_copy = false;
242+
233243#if OPAL_CUDA_SUPPORT
234244 int is_gpu , is_managed ;
235245 mca_common_ompio_check_gpu_buf ( fh , buf , & is_gpu , & is_managed );
236246 if ( is_gpu && !is_managed ) {
247+ need_to_copy = true;
248+ }
249+ #endif
250+
251+ if ( !( fh -> f_flags & OMPIO_DATAREP_NATIVE ) &&
252+ !(datatype == & ompi_mpi_byte .dt ||
253+ datatype == & ompi_mpi_char .dt )) {
254+ /* only need to copy if any of these conditions are given:
255+ 1. buffer is an unmanaged CUDA buffer (checked above).
256+ 2. Datarepresentation is anything other than 'native' and
257+ 3. datatype is not byte or char (i.e it does require some actual
258+ work to be done e.g. for external32.
259+ */
260+ need_to_copy = true;
261+ }
262+
263+ if ( need_to_copy ) {
237264 size_t pos = 0 ;
238265 char * tbuf = NULL ;
239266 opal_convertor_t convertor ;
240-
241- OMPIO_CUDA_PREPARE_BUF (fh ,buf ,count ,datatype ,tbuf ,& convertor ,max_data ,decoded_iov ,iov_count );
242267
268+ OMPIO_PREPARE_BUF (fh ,buf ,count ,datatype ,tbuf ,& convertor ,max_data ,decoded_iov ,iov_count );
243269 opal_convertor_pack (& convertor , decoded_iov , & iov_count , & pos );
244270 opal_convertor_cleanup (& convertor );
245271
@@ -256,16 +282,7 @@ int mca_common_ompio_file_iwrite (ompio_file_t *fh,
256282 & decoded_iov ,
257283 & iov_count );
258284 }
259- #else
260- mca_common_ompio_decode_datatype (fh ,
261- datatype ,
262- count ,
263- buf ,
264- & max_data ,
265- fh -> f_mem_convertor ,
266- & decoded_iov ,
267- & iov_count );
268- #endif
285+
269286 if ( 0 < max_data && 0 == fh -> f_iov_count ) {
270287 ompio_req -> req_ompi .req_status .MPI_ERROR = OMPI_SUCCESS ;
271288 ompio_req -> req_ompi .req_status ._ucount = 0 ;
0 commit comments