@@ -58,12 +58,13 @@ static mca_fbtl_base_module_1_0_0_t posix = {
5858#if defined (FBTL_POSIX_HAVE_AIO )
5959 mca_fbtl_posix_ipwritev , /* non-blocking write */
6060 mca_fbtl_posix_progress , /* module specific progress */
61- mca_fbtl_posix_request_free /* free module specific data items on the request */
61+ mca_fbtl_posix_request_free , /* free module specific data items on the request */
6262#else
6363 NULL , /* non-blocking write */
6464 NULL , /* module specific progress */
65- NULL /* free module specific data items on the request */
65+ NULL , /* free module specific data items on the request */
6666#endif
67+ mca_fbtl_posix_check_atomicity /* check whether atomicity is supported on this fs */
6768};
6869/*
6970 * *******************************************************************
@@ -144,34 +145,38 @@ bool mca_fbtl_posix_progress ( mca_ompio_request_t *req)
144145 data -> aio_req_status [i ] = EINPROGRESS ;
145146 start_offset = data -> aio_reqs [i ].aio_offset ;
146147 total_length = data -> aio_reqs [i ].aio_nbytes ;
148+ /* release previous lock */
149+ mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh , & data -> aio_lock_counter );
150+
147151 if ( data -> aio_req_type == FBTL_POSIX_WRITE ) {
148- ret_code = mca_fbtl_posix_lock ( & data -> aio_lock , data -> aio_fh , F_WRLCK , start_offset , total_length , OMPIO_LOCK_ENTIRE_REGION );
152+ ret_code = mca_fbtl_posix_lock ( & data -> aio_lock , data -> aio_fh , F_WRLCK , start_offset , total_length ,
153+ OMPIO_LOCK_ENTIRE_REGION , & data -> aio_lock_counter );
149154 if ( 0 < ret_code ) {
150155 opal_output (1 , "mca_fbtl_posix_progress: error in mca_fbtl_posix_lock() %d" , ret_code );
151156 /* Just in case some part of the lock actually succeeded. */
152- mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh );
157+ mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh , & data -> aio_lock_counter );
153158 return OMPI_ERROR ;
154159 }
155160 if (-1 == aio_write (& data -> aio_reqs [i ])) {
156161 opal_output (1 , "mca_fbtl_posix_progress: error in aio_write()" );
157- mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh );
162+ mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh , & data -> aio_lock_counter );
158163 return OMPI_ERROR ;
159164 }
160165 }
161166 else if ( data -> aio_req_type == FBTL_POSIX_READ ) {
162- ret_code = mca_fbtl_posix_lock ( & data -> aio_lock , data -> aio_fh , F_RDLCK , start_offset , total_length , OMPIO_LOCK_ENTIRE_REGION );
167+ ret_code = mca_fbtl_posix_lock ( & data -> aio_lock , data -> aio_fh , F_RDLCK , start_offset , total_length ,
168+ OMPIO_LOCK_ENTIRE_REGION , & data -> aio_lock_counter );
163169 if ( 0 < ret_code ) {
164170 opal_output (1 , "mca_fbtl_posix_progress: error in mca_fbtl_posix_lock() %d" , ret_code );
165171 /* Just in case some part of the lock actually succeeded. */
166- mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh );
172+ mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh , & data -> aio_lock_counter );
167173 return OMPI_ERROR ;
168174 }
169175 if (-1 == aio_read (& data -> aio_reqs [i ])) {
170176 opal_output (1 , "mca_fbtl_posix_progress: error in aio_read()" );
171- mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh );
177+ mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh , & data -> aio_lock_counter );
172178 return OMPI_ERROR ;
173179 }
174- mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh );
175180 }
176181 }
177182 else {
@@ -199,10 +204,9 @@ bool mca_fbtl_posix_progress ( mca_ompio_request_t *req)
199204#if 0
200205 printf ("lcount=%d open_reqs=%d\n" , lcount , data -> aio_open_reqs );
201206#endif
202-
203207 if ( (lcount == data -> aio_req_chunks ) && (0 != data -> aio_open_reqs )) {
204208 /* release the lock of the previous operations */
205- mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh );
209+ mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh , & data -> aio_lock_counter );
206210
207211 /* post the next batch of operations */
208212 data -> aio_first_active_req = data -> aio_last_active_req ;
@@ -218,30 +222,32 @@ bool mca_fbtl_posix_progress ( mca_ompio_request_t *req)
218222 total_length = (end_offset - start_offset );
219223
220224 if ( FBTL_POSIX_READ == data -> aio_req_type ) {
221- ret_code = mca_fbtl_posix_lock ( & data -> aio_lock , data -> aio_fh , F_RDLCK , start_offset , total_length , OMPIO_LOCK_ENTIRE_REGION );
225+ ret_code = mca_fbtl_posix_lock ( & data -> aio_lock , data -> aio_fh , F_RDLCK , start_offset , total_length ,
226+ OMPIO_LOCK_ENTIRE_REGION , & data -> aio_lock_counter );
222227 }
223228 else if ( FBTL_POSIX_WRITE == data -> aio_req_type ) {
224- ret_code = mca_fbtl_posix_lock ( & data -> aio_lock , data -> aio_fh , F_WRLCK , start_offset , total_length , OMPIO_LOCK_ENTIRE_REGION );
229+ ret_code = mca_fbtl_posix_lock ( & data -> aio_lock , data -> aio_fh , F_WRLCK , start_offset , total_length ,
230+ OMPIO_LOCK_ENTIRE_REGION , & data -> aio_lock_counter );
225231 }
226232 if ( 0 < ret_code ) {
227233 opal_output (1 , "mca_fbtl_posix_progress: error in mca_fbtl_posix_lock() %d" , ret_code );
228234 /* Just in case some part of the lock actually succeeded. */
229- mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh );
235+ mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh , & data -> aio_lock_counter );
230236 return OMPI_ERROR ;
231237 }
232238
233239 for ( i = data -> aio_first_active_req ; i < data -> aio_last_active_req ; i ++ ) {
234240 if ( FBTL_POSIX_READ == data -> aio_req_type ) {
235241 if (-1 == aio_read (& data -> aio_reqs [i ])) {
236242 opal_output (1 , "mca_fbtl_posix_progress: error in aio_read()" );
237- mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh );
243+ mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh , & data -> aio_lock_counter );
238244 return OMPI_ERROR ;
239245 }
240246 }
241247 else if ( FBTL_POSIX_WRITE == data -> aio_req_type ) {
242248 if (-1 == aio_write (& data -> aio_reqs [i ])) {
243249 opal_output (1 , "mca_fbtl_posix_progress: error in aio_write()" );
244- mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh );
250+ mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh , & data -> aio_lock_counter );
245251 return OMPI_ERROR ;
246252 }
247253 }
@@ -255,8 +261,13 @@ bool mca_fbtl_posix_progress ( mca_ompio_request_t *req)
255261 /* all pending operations are finished for this request */
256262 req -> req_ompi .req_status .MPI_ERROR = OMPI_SUCCESS ;
257263 req -> req_ompi .req_status ._ucount = data -> aio_total_len ;
258- mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh );
259- ret = true;
264+ mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh , & data -> aio_lock_counter );
265+
266+ if ( data -> aio_fh -> f_atomicity ) {
267+ mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh , & data -> aio_lock_counter );
268+ }
269+
270+ ret = true;
260271 }
261272#endif
262273 return ret ;
@@ -268,8 +279,8 @@ void mca_fbtl_posix_request_free ( mca_ompio_request_t *req)
268279 /* Free the fbtl specific data structures */
269280 mca_fbtl_posix_request_data_t * data = (mca_fbtl_posix_request_data_t * )req -> req_data ;
270281 if (NULL != data ) {
271- mca_fbtl_posix_unlock ( & data -> aio_lock , data -> aio_fh );
272- if ( NULL != data -> aio_reqs ) {
282+
283+ if ( NULL != data -> aio_reqs ) {
273284 free ( data -> aio_reqs );
274285 }
275286 if ( NULL != data -> aio_req_status ) {
@@ -281,3 +292,27 @@ void mca_fbtl_posix_request_free ( mca_ompio_request_t *req)
281292#endif
282293 return ;
283294}
295+
296+ bool mca_fbtl_posix_check_atomicity ( ompio_file_t * file )
297+ {
298+ struct flock lock ;
299+
300+ lock .l_type = F_WRLCK ;
301+ lock .l_whence = SEEK_SET ;
302+ lock .l_start = 0 ;
303+ lock .l_len = 0 ;
304+ lock .l_pid = 0 ;
305+
306+ if (fcntl (file -> fd , F_GETLK , & lock ) < 0 )
307+ {
308+ #ifdef VERBOSE
309+ printf ("Failed to get lock info for '%s': %s\n" , filename , strerror (errno ));
310+ #endif
311+ return false;
312+ }
313+
314+ #ifdef VERBOSE
315+ printf ("Lock would have worked, l_type=%d\n" , (int )lock .l_type );
316+ #endif
317+ return true;
318+ }
0 commit comments