@@ -153,7 +153,7 @@ int32_t opal_unpack_homogeneous_contig_function(opal_convertor_t *pConv, struct
153153 }
154154 }
155155 }
156- * out_size = iov_idx ; /* we only reach this line after the for loop succesfully complete */
156+ * out_size = iov_idx ; /* we only reach this line after the for loop successfully complete */
157157 * max_data = pConv -> bConverted - initial_bytes_converted ;
158158 if (pConv -> bConverted == pConv -> local_size ) {
159159 pConv -> flags |= CONVERTOR_COMPLETED ;
@@ -173,63 +173,71 @@ int32_t opal_unpack_homogeneous_contig_function(opal_convertor_t *pConv, struct
173173 * change the content of the data (as in all conversions that require changing the size
174174 * of the exponent or mantissa).
175175 */
176- static inline void opal_unpack_partial_datatype ( opal_convertor_t * pConvertor , dt_elem_desc_t * pElem ,
177- unsigned char * partial_data ,
178- ptrdiff_t start_position , size_t length ,
179- unsigned char * * user_buffer )
176+ static inline void
177+ opal_unpack_partial_predefined ( opal_convertor_t * pConvertor , const dt_elem_desc_t * pElem ,
178+ size_t * COUNT , unsigned char * * packed ,
179+ unsigned char * * memory , size_t * SPACE )
180180{
181181 char unused_byte = 0x7F , saved_data [16 ];
182182 unsigned char temporary [16 ], * temporary_buffer = temporary ;
183- unsigned char * user_data = * user_buffer + pElem -> elem .disp ;
184- size_t count_desc = 1 ;
183+ unsigned char * user_data = * memory + pElem -> elem .disp ;
185184 size_t data_length = opal_datatype_basicDatatypes [pElem -> elem .common .type ]-> size ;
185+ unsigned char * partial_data = * packed ;
186+ ptrdiff_t start_position = pConvertor -> partial_length ;
187+ size_t length = data_length - start_position ;
188+ size_t count_desc = 1 ;
189+ dt_elem_desc_t single_elem = { .elem = { .common = pElem -> elem .common , .count = 1 , .blocklen = 1 ,
190+ .extent = data_length , /* advance by a full data element */
191+ .disp = 0 /* right where the pointer is */ } };
192+ if ( * SPACE < length ) {
193+ length = * SPACE ;
194+ }
186195
187- DO_DEBUG (opal_output (0 ,
188- "unpack partial data start %lu end %lu data_length %lu user %p\n"
189- "\tbConverted %lu total_length %lu count %ld\n" ,
190- (unsigned long ) start_position , (unsigned long ) start_position + length ,
191- (unsigned long ) data_length , (void * ) * user_buffer ,
192- (unsigned long ) pConvertor -> bConverted ,
193- (unsigned long ) pConvertor -> local_size , pConvertor -> count ););
194-
195- /* Find a byte that is not used in the partial buffer */
196- find_unused_byte :
197- for (size_t i = 0 ; i < length ; i ++ ) {
198- if (unused_byte == partial_data [i ]) {
196+ DO_DEBUG ( opal_output ( 0 , "unpack partial data start %lu end %lu data_length %lu user %p\n"
197+ "\tbConverted %lu total_length %lu count %ld\n" ,
198+ (unsigned long )start_position , (unsigned long )start_position + length ,
199+ (unsigned long )data_length , (void * )* memory ,
200+ (unsigned long )pConvertor -> bConverted ,
201+ (unsigned long )pConvertor -> local_size , pConvertor -> count ); );
202+ COMPUTE_CSUM ( partial_data , length , pConvertor );
203+
204+ /* Find a byte value that is not used in the partial buffer. We use it as a marker
205+ * to identify what has not been modified by the unpack call. */
206+ find_unused_byte :
207+ for (size_t i = 0 ; i < length ; i ++ ) {
208+ if ( unused_byte == partial_data [i ] ) {
199209 unused_byte -- ;
200210 goto find_unused_byte ;
201211 }
202212 }
203213
204- /* Copy and fill the rest of the buffer with the unused byte */
205- memset (temporary , unused_byte , data_length );
206- MEMCPY (temporary + start_position , partial_data , length );
214+ /* Prepare an full element of the predefined type, by populating an entire type
215+ * with the unused byte and then put the partial data at the right position. */
216+ memset ( temporary , unused_byte , data_length );
217+ MEMCPY ( temporary + start_position , partial_data , length );
207218
219+ /* Save the original content of the user memory */
208220#if OPAL_CUDA_SUPPORT
209221 /* In the case where the data is being unpacked from device memory, need to
210- * use the special host to device memory copy. Note this code path was only
211- * seen on large receives of noncontiguous data via buffered sends. */
212- pConvertor -> cbmemcpy (saved_data , user_data , data_length , pConvertor );
222+ * use the special host to device memory copy. */
223+ pConvertor -> cbmemcpy (saved_data , user_data , data_length , pConvertor );
213224#else
214- /* Save the content of the user memory */
215- MEMCPY (saved_data , user_data , data_length );
225+ MEMCPY ( saved_data , user_data , data_length );
216226#endif
217227
218228 /* Then unpack the data into the user memory */
219- UNPACK_PREDEFINED_DATATYPE (pConvertor , pElem , count_desc , temporary_buffer , * user_buffer ,
229+ UNPACK_PREDEFINED_DATATYPE (pConvertor , & single_elem , count_desc , temporary_buffer , user_data ,
220230 data_length );
221231
222- /* reload the length as it is reset by the macro */
232+ /* reload the length and user buffer as they have been updated by the macro */
223233 data_length = opal_datatype_basicDatatypes [pElem -> elem .common .type ]-> size ;
234+ user_data = * memory + pElem -> elem .disp ;
224235
225- /* For every occurrence of the unused byte move data from the saved
226- * buffer back into the user memory.
227- */
236+ /* Rebuild the data by pulling back the unmodified bytes from the original
237+ * content in the user memory. */
228238#if OPAL_CUDA_SUPPORT
229239 /* Need to copy the modified user_data again so we can see which
230- * bytes need to be converted back to their original values. Note
231- * this code path was only seen on large receives of noncontiguous
232- * data via buffered sends. */
240+ * bytes need to be converted back to their original values. */
233241 {
234242 char resaved_data [16 ];
235243 pConvertor -> cbmemcpy (resaved_data , user_data , data_length , pConvertor );
@@ -245,6 +253,16 @@ static inline void opal_unpack_partial_datatype(opal_convertor_t *pConvertor, dt
245253 }
246254 }
247255#endif
256+ pConvertor -> partial_length = (pConvertor -> partial_length + length ) % data_length ;
257+ * SPACE -= length ;
258+ * packed += length ;
259+ if (0 == pConvertor -> partial_length ) {
260+ (* COUNT )-- ; /* we have enough to complete one full predefined type */
261+ * memory += data_length ;
262+ if (0 == (* COUNT % pElem -> elem .blocklen )) {
263+ * memory += pElem -> elem .extent - (pElem -> elem .blocklen * data_length );
264+ }
265+ }
248266}
249267
250268/* The pack/unpack functions need a cleanup. I have to create a proper interface to access
@@ -271,9 +289,8 @@ int32_t opal_generic_simple_unpack_function(opal_convertor_t *pConvertor, struct
271289 size_t iov_len_local ;
272290 uint32_t iov_count ;
273291
274- DO_DEBUG (opal_output (0 , "opal_convertor_generic_simple_unpack( %p, {%p, %lu}, %u )\n" ,
275- (void * ) pConvertor , (void * ) iov [0 ].iov_base ,
276- (unsigned long ) iov [0 ].iov_len , * out_size ););
292+ DO_DEBUG ( opal_output ( 0 , "opal_convertor_generic_simple_unpack( %p, iov[%u] = {%p, %lu} )\n" ,
293+ (void * )pConvertor , * out_size , (void * )iov [0 ].iov_base , (unsigned long )iov [0 ].iov_len ); );
277294
278295 description = pConvertor -> use_desc -> desc ;
279296
@@ -300,26 +317,25 @@ int32_t opal_generic_simple_unpack_function(opal_convertor_t *pConvertor, struct
300317 iov_ptr = (unsigned char * ) iov [iov_count ].iov_base ;
301318 iov_len_local = iov [iov_count ].iov_len ;
302319
303- if (0 != pConvertor -> partial_length ) {
304- size_t element_length = opal_datatype_basicDatatypes [pElem -> elem .common .type ]-> size ;
305- size_t missing_length = element_length - pConvertor -> partial_length ;
306-
307- assert (pElem -> elem .common .flags & OPAL_DATATYPE_FLAG_DATA );
308- COMPUTE_CSUM (iov_ptr , missing_length , pConvertor );
309- opal_unpack_partial_datatype (pConvertor , pElem , iov_ptr , pConvertor -> partial_length ,
310- (size_t )(element_length - pConvertor -> partial_length ),
311- & conv_ptr );
312- -- count_desc ;
313- if (0 == count_desc ) {
314- conv_ptr = pConvertor -> pBaseBuf + pStack -> disp ;
315- pos_desc ++ ; /* advance to the next data */
316- UPDATE_INTERNAL_COUNTERS (description , pos_desc , pElem , count_desc );
317- }
318- iov_ptr += missing_length ;
319- iov_len_local -= missing_length ;
320- pConvertor -> partial_length = 0 ; /* nothing more inside */
321- }
320+ /* Deal with all types of partial predefined datatype unpacking, including when
321+ * unpacking a partial predefined element and when unpacking a part smaller than
322+ * the blocklen.
323+ */
322324 if (pElem -> elem .common .flags & OPAL_DATATYPE_FLAG_DATA ) {
325+ if (0 != pConvertor -> partial_length ) { /* partial predefined element */
326+ assert ( pElem -> elem .common .flags & OPAL_DATATYPE_FLAG_DATA );
327+ opal_unpack_partial_predefined ( pConvertor , pElem , & count_desc ,
328+ & iov_ptr , & conv_ptr , & iov_len_local );
329+ if (0 == count_desc ) { /* the end of the vector ? */
330+ assert ( 0 == pConvertor -> partial_length );
331+ conv_ptr = pConvertor -> pBaseBuf + pStack -> disp ;
332+ pos_desc ++ ; /* advance to the next data */
333+ UPDATE_INTERNAL_COUNTERS (description , pos_desc , pElem , count_desc );
334+ goto next_vector ;
335+ }
336+ if ( 0 == iov_len_local )
337+ goto complete_loop ;
338+ }
323339 if (((size_t ) pElem -> elem .count * pElem -> elem .blocklen ) != count_desc ) {
324340 /* we have a partial (less than blocklen) basic datatype */
325341 int rc = UNPACK_PARTIAL_BLOCKLEN (pConvertor , pElem , count_desc , iov_ptr , conv_ptr ,
@@ -336,6 +352,7 @@ int32_t opal_generic_simple_unpack_function(opal_convertor_t *pConvertor, struct
336352 }
337353
338354 while (1 ) {
355+ next_vector :
339356 while (pElem -> elem .common .flags & OPAL_DATATYPE_FLAG_DATA ) {
340357 /* we have a basic datatype (working on full blocks) */
341358 UNPACK_PREDEFINED_DATATYPE (pConvertor , pElem , count_desc , iov_ptr , conv_ptr ,
@@ -401,19 +418,14 @@ int32_t opal_generic_simple_unpack_function(opal_convertor_t *pConvertor, struct
401418 }
402419 }
403420 complete_loop :
404- assert (pElem -> elem .common .type < OPAL_DATATYPE_MAX_PREDEFINED );
405- if (( pElem -> elem .common .flags & OPAL_DATATYPE_FLAG_DATA ) && (0 != iov_len_local )) {
406- unsigned char * temp = conv_ptr ;
421+ assert ( pElem -> elem .common .type < OPAL_DATATYPE_MAX_PREDEFINED );
422+ if ( ( pElem -> elem .common .flags & OPAL_DATATYPE_FLAG_DATA ) && (0 != iov_len_local ) ) {
423+ unsigned char * temp = conv_ptr ;
407424 /* We have some partial data here. Let's copy it into the convertor
408425 * and keep it hot until the next round.
409426 */
410- assert (iov_len_local < opal_datatype_basicDatatypes [pElem -> elem .common .type ]-> size );
411- COMPUTE_CSUM (iov_ptr , iov_len_local , pConvertor );
412-
413- opal_unpack_partial_datatype (pConvertor , pElem , iov_ptr , 0 , iov_len_local , & temp );
414-
415- pConvertor -> partial_length = iov_len_local ;
416- iov_len_local = 0 ;
427+ assert ( iov_len_local < opal_datatype_basicDatatypes [pElem -> elem .common .type ]-> size );
428+ opal_unpack_partial_predefined (pConvertor , pElem , & count_desc , & iov_ptr , & temp , & iov_len_local );
417429 }
418430
419431 iov [iov_count ].iov_len -= iov_len_local ; /* update the amount of valid data */
@@ -543,11 +555,6 @@ int32_t opal_unpack_general_function(opal_convertor_t *pConvertor, struct iovec
543555 unsigned char * conv_ptr , * iov_ptr ;
544556 uint32_t iov_count ;
545557 size_t iov_len_local ;
546- #if 0
547- const opal_convertor_master_t * master = pConvertor -> master ;
548- ptrdiff_t advance ; /* number of bytes that we should advance the buffer */
549- #endif
550- size_t rc ;
551558
552559 DO_DEBUG (opal_output (0 , "opal_convertor_general_unpack( %p, {%p, %lu}, %d )\n" ,
553560 (void * ) pConvertor , (void * ) iov [0 ].iov_base ,
@@ -609,13 +616,9 @@ int32_t opal_unpack_general_function(opal_convertor_t *pConvertor, struct iovec
609616 * and keep it hot until the next round.
610617 */
611618 assert (iov_len_local < opal_datatype_basicDatatypes [pElem -> elem .common .type ]-> size );
612- COMPUTE_CSUM (iov_ptr , iov_len_local , pConvertor );
613-
614- opal_unpack_partial_datatype (pConvertor , pElem , iov_ptr , 0 , iov_len_local ,
615- & temp );
616-
617- pConvertor -> partial_length = iov_len_local ;
618- iov_len_local = 0 ;
619+ opal_unpack_partial_predefined (pConvertor , pElem , & count_desc , & iov_ptr ,
620+ & temp , & iov_len_local );
621+ assert ( 0 == iov_len_local );
619622 }
620623 goto complete_loop ;
621624 }
0 commit comments