@@ -88,17 +88,33 @@ int mca_common_ompio_file_read (ompio_file_t *fh,
8888 return ret ;
8989 }
9090
91-
91+ bool need_to_copy = false;
92+ opal_convertor_t convertor ;
9293#if OPAL_CUDA_SUPPORT
9394 int is_gpu , is_managed ;
94- opal_convertor_t convertor ;
9595 mca_common_ompio_check_gpu_buf ( fh , buf , & is_gpu , & is_managed );
9696 if ( is_gpu && !is_managed ) {
97+ need_to_copy = true;
98+ }
99+ #endif
100+
101+ if ( !( fh -> f_flags & OMPIO_DATAREP_NATIVE ) &&
102+ !(datatype == & ompi_mpi_byte .dt ||
103+ datatype == & ompi_mpi_char .dt )) {
104+ /* only need to copy if any of these conditions are given:
105+ 1. buffer is an unmanaged CUDA buffer (checked above).
106+ 2. Datarepresentation is anything other than 'native' and
107+ 3. datatype is not byte or char (i.e it does require some actual
108+ work to be done e.g. for external32.
109+ */
110+ need_to_copy = true;
111+ }
112+
113+ if ( need_to_copy ) {
97114 char * tbuf = NULL ;
98115
99- OMPIO_CUDA_PREPARE_BUF (fh ,buf ,count ,datatype ,tbuf ,& convertor ,max_data ,decoded_iov ,iov_count );
100-
101- }
116+ OMPIO_PREPARE_READ_BUF (fh ,buf ,count ,datatype ,tbuf ,& convertor ,max_data ,decoded_iov ,iov_count );
117+ }
102118 else {
103119 mca_common_ompio_decode_datatype (fh ,
104120 datatype ,
@@ -109,16 +125,7 @@ int mca_common_ompio_file_read (ompio_file_t *fh,
109125 & decoded_iov ,
110126 & iov_count );
111127 }
112- #else
113- mca_common_ompio_decode_datatype (fh ,
114- datatype ,
115- count ,
116- buf ,
117- & max_data ,
118- fh -> f_mem_convertor ,
119- & decoded_iov ,
120- & iov_count );
121- #endif
128+
122129 if ( 0 < max_data && 0 == fh -> f_iov_count ) {
123130 if ( MPI_STATUS_IGNORE != status ) {
124131 status -> _ucount = 0 ;
@@ -170,15 +177,14 @@ int mca_common_ompio_file_read (ompio_file_t *fh,
170177 }
171178 }
172179
173- #if OPAL_CUDA_SUPPORT
174- if ( is_gpu && !is_managed ) {
180+ if ( need_to_copy ) {
175181 size_t pos = 0 ;
176182
177183 opal_convertor_unpack (& convertor , decoded_iov , & iov_count , & pos );
178184 opal_convertor_cleanup (& convertor );
179185 mca_common_ompio_release_buf (fh , decoded_iov -> iov_base );
180186 }
181- #endif
187+
182188 if (NULL != decoded_iov ) {
183189 free (decoded_iov );
184190 decoded_iov = NULL ;
@@ -257,13 +263,32 @@ int mca_common_ompio_file_iread (ompio_file_t *fh,
257263 int i = 0 ; /* index into the decoded iovec of the buffer */
258264 int j = 0 ; /* index into the file vie iovec */
259265
266+ bool need_to_copy = false;
267+
260268#if OPAL_CUDA_SUPPORT
261269 int is_gpu , is_managed ;
262270 mca_common_ompio_check_gpu_buf ( fh , buf , & is_gpu , & is_managed );
263271 if ( is_gpu && !is_managed ) {
272+ need_to_copy = true;
273+ }
274+ #endif
275+
276+ if ( !( fh -> f_flags & OMPIO_DATAREP_NATIVE ) &&
277+ !(datatype == & ompi_mpi_byte .dt ||
278+ datatype == & ompi_mpi_char .dt )) {
279+ /* only need to copy if any of these conditions are given:
280+ 1. buffer is an unmanaged CUDA buffer (checked above).
281+ 2. Datarepresentation is anything other than 'native' and
282+ 3. datatype is not byte or char (i.e it does require some actual
283+ work to be done e.g. for external32.
284+ */
285+ need_to_copy = true;
286+ }
287+
288+ if ( need_to_copy ) {
264289 char * tbuf = NULL ;
265290
266- OMPIO_CUDA_PREPARE_BUF (fh ,buf ,count ,datatype ,tbuf ,& ompio_req -> req_convertor ,max_data ,decoded_iov ,iov_count );
291+ OMPIO_PREPARE_READ_BUF (fh ,buf ,count ,datatype ,tbuf ,& ompio_req -> req_convertor ,max_data ,decoded_iov ,iov_count );
267292
268293 ompio_req -> req_tbuf = tbuf ;
269294 ompio_req -> req_size = max_data ;
@@ -278,16 +303,7 @@ int mca_common_ompio_file_iread (ompio_file_t *fh,
278303 & decoded_iov ,
279304 & iov_count );
280305 }
281- #else
282- mca_common_ompio_decode_datatype (fh ,
283- datatype ,
284- count ,
285- buf ,
286- & max_data ,
287- fh -> f_mem_convertor ,
288- & decoded_iov ,
289- & iov_count );
290- #endif
306+
291307 if ( 0 < max_data && 0 == fh -> f_iov_count ) {
292308 ompio_req -> req_ompi .req_status .MPI_ERROR = OMPI_SUCCESS ;
293309 ompio_req -> req_ompi .req_status ._ucount = 0 ;
0 commit comments