33 * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
44 * University Research and Technology
55 * Corporation. All rights reserved.
6- * Copyright (c) 2004-2014 The University of Tennessee and The University
6+ * Copyright (c) 2004-2019 The University of Tennessee and The University
77 * of Tennessee Research Foundation. All rights
88 * reserved.
99 * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
@@ -123,11 +123,18 @@ position_predefined_data( opal_convertor_t* CONVERTOR,
123123 do_now = cando_count / _elem -> blocklen ;
124124 if ( 0 != do_now ) {
125125 do_now_bytes = _elem -> blocklen * opal_datatype_basicDatatypes [_elem -> common .type ]-> size ;
126+ #if OPAL_ENABLE_DEBUG
126127 for (size_t _i = 0 ; _i < do_now ; _i ++ ) {
127128 position_single_block ( CONVERTOR , & _memory , _elem -> extent ,
128129 SPACE , do_now_bytes , COUNT , _elem -> blocklen );
129130 cando_count -= _elem -> blocklen ;
130131 }
132+ #else
133+ _memory += do_now * _elem -> extent ;
134+ * SPACE -= do_now * do_now_bytes ;
135+ * COUNT -= do_now * _elem -> blocklen ;
136+ cando_count -= do_now * _elem -> blocklen ;
137+ #endif /* OPAL_ENABLE_DEBUG */
131138 }
132139
133140 /**
@@ -144,48 +151,16 @@ position_predefined_data( opal_convertor_t* CONVERTOR,
144151 * (POINTER ) = _memory - _elem -> disp ;
145152}
146153
147- /**
148- * Advance the current position in the convertor based using the
149- * current contiguous loop and a left-over counter. Update the head
150- * pointer and the leftover byte space.
151- */
152- static inline void
153- position_contiguous_loop ( opal_convertor_t * CONVERTOR ,
154- dt_elem_desc_t * ELEM ,
155- size_t * COUNT ,
156- unsigned char * * POINTER ,
157- size_t * SPACE )
158- {
159- ddt_loop_desc_t * _loop = (ddt_loop_desc_t * )(ELEM );
160- ddt_endloop_desc_t * _end_loop = (ddt_endloop_desc_t * )((ELEM ) + (ELEM )-> loop .items );
161- size_t _copy_loops = * (COUNT );
162-
163- if ( (_copy_loops * _end_loop -> size ) > * (SPACE ) )
164- _copy_loops = * (SPACE ) / _end_loop -> size ;
165- OPAL_DATATYPE_SAFEGUARD_POINTER ( * (POINTER ) + _end_loop -> first_elem_disp ,
166- (_copy_loops - 1 ) * _loop -> extent + _end_loop -> size ,
167- (CONVERTOR )-> pBaseBuf , (CONVERTOR )-> pDesc , (CONVERTOR )-> count );
168- * (POINTER ) += _copy_loops * _loop -> extent ;
169- * (SPACE ) -= _copy_loops * _end_loop -> size ;
170- * (COUNT ) -= _copy_loops ;
171- }
172-
173- #define POSITION_PREDEFINED_DATATYPE ( CONVERTOR , ELEM , COUNT , POSITION , SPACE ) \
174- position_predefined_data( (CONVERTOR), (ELEM), &(COUNT), &(POSITION), &(SPACE) )
175-
176- #define POSITION_CONTIGUOUS_LOOP ( CONVERTOR , ELEM , COUNT , POSITION , SPACE ) \
177- position_contiguous_loop( (CONVERTOR), (ELEM), &(COUNT), &(POSITION), &(SPACE) )
178-
179154int opal_convertor_generic_simple_position ( opal_convertor_t * pConvertor ,
180155 size_t * position )
181156{
182157 dt_stack_t * pStack ; /* pointer to the position on the stack */
183158 uint32_t pos_desc ; /* actual position in the description of the derived datatype */
184159 size_t count_desc ; /* the number of items already done in the actual pos_desc */
160+ size_t iov_len_local ;
185161 dt_elem_desc_t * description = pConvertor -> use_desc -> desc ;
186162 dt_elem_desc_t * pElem ; /* current position */
187163 unsigned char * base_pointer = pConvertor -> pBaseBuf ;
188- size_t iov_len_local ;
189164 ptrdiff_t extent = pConvertor -> pDesc -> ub - pConvertor -> pDesc -> lb ;
190165
191166 DUMP ( "opal_convertor_generic_simple_position( %p, &%ld )\n" , (void * )pConvertor , (long )* position );
@@ -236,21 +211,19 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
236211 assert (pConvertor -> partial_length < element_length );
237212 return 0 ;
238213 }
239- pConvertor -> partial_length = (pConvertor -> partial_length + missing_length ) % element_length ;
240- assert (pConvertor -> partial_length == 0 );
214+ pConvertor -> partial_length = 0 ;
241215 pConvertor -> bConverted += missing_length ;
242216 iov_len_local -= missing_length ;
243217 count_desc -- ;
244218 }
245219 while ( 1 ) {
246- if ( OPAL_DATATYPE_END_LOOP == pElem -> elem .common .type ) { /* end of the current loop */
220+ if ( OPAL_DATATYPE_END_LOOP == pElem -> elem .common .type ) { /* end of the the entire datatype */
247221 DO_DEBUG ( opal_output ( 0 , "position end_loop count %" PRIsize_t " stack_pos %d pos_desc %d disp %lx space %lu\n" ,
248222 pStack -> count , pConvertor -> stack_pos , pos_desc ,
249223 pStack -> disp , (unsigned long )iov_len_local ); );
250224 if ( -- (pStack -> count ) == 0 ) { /* end of loop */
251225 if ( pConvertor -> stack_pos == 0 ) {
252226 pConvertor -> flags |= CONVERTOR_COMPLETED ;
253- pConvertor -> partial_length = 0 ;
254227 goto complete_loop ; /* completed */
255228 }
256229 pConvertor -> stack_pos -- ;
@@ -259,11 +232,13 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
259232 } else {
260233 if ( pStack -> index == -1 ) {
261234 pStack -> disp += extent ;
235+ pos_desc = 0 ; /* back to the first element */
262236 } else {
263237 assert ( OPAL_DATATYPE_LOOP == description [pStack -> index ].loop .common .type );
264238 pStack -> disp += description [pStack -> index ].loop .extent ;
239+ pos_desc = pStack -> index ; /* go back to the loop start itself to give a chance
240+ * to move forward by entire loops */
265241 }
266- pos_desc = pStack -> index + 1 ;
267242 }
268243 base_pointer = pConvertor -> pBaseBuf + pStack -> disp ;
269244 UPDATE_INTERNAL_COUNTERS ( description , pos_desc , pElem , count_desc );
@@ -273,9 +248,14 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
273248 }
274249 if ( OPAL_DATATYPE_LOOP == pElem -> elem .common .type ) {
275250 ptrdiff_t local_disp = (ptrdiff_t )base_pointer ;
276- if ( pElem -> loop .common .flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
277- POSITION_CONTIGUOUS_LOOP ( pConvertor , pElem , count_desc ,
278- base_pointer , iov_len_local );
251+ ddt_endloop_desc_t * end_loop = (ddt_endloop_desc_t * )(pElem + pElem -> loop .items );
252+ size_t full_loops = iov_len_local / end_loop -> size ;
253+ full_loops = count_desc <= full_loops ? count_desc : full_loops ;
254+ if ( full_loops ) {
255+ base_pointer += full_loops * pElem -> loop .extent ;
256+ iov_len_local -= full_loops * end_loop -> size ;
257+ count_desc -= full_loops ;
258+
279259 if ( 0 == count_desc ) { /* completed */
280260 pos_desc += pElem -> loop .items + 1 ;
281261 goto update_loop_description ;
@@ -297,8 +277,7 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
297277 }
298278 while ( pElem -> elem .common .flags & OPAL_DATATYPE_FLAG_DATA ) {
299279 /* now here we have a basic datatype */
300- POSITION_PREDEFINED_DATATYPE ( pConvertor , pElem , count_desc ,
301- base_pointer , iov_len_local );
280+ position_predefined_data ( pConvertor , pElem , & count_desc , & base_pointer , & iov_len_local );
302281 if ( 0 != count_desc ) { /* completed */
303282 pConvertor -> partial_length = iov_len_local ;
304283 goto complete_loop ;
0 commit comments