@@ -195,14 +195,18 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
195195 return 0 ;
196196 }
197197 }
198-
198+ /* First access on a device pointer finalizes CUDA support initialization. */
199+ opal_accelerator_cuda_delayed_init ();
199200 return 1 ;
200201}
201202
202203static int accelerator_cuda_create_stream (int dev_id , opal_accelerator_stream_t * * stream )
203204{
204205 CUresult result ;
205-
206+ int delayed_init = opal_accelerator_cuda_delayed_init ();
207+ if (OPAL_UNLIKELY (0 != delayed_init )) {
208+ return delayed_init ;
209+ }
206210 * stream = (opal_accelerator_stream_t * )OBJ_NEW (opal_accelerator_cuda_stream_t );
207211 if (NULL == * stream ) {
208212 return OPAL_ERR_OUT_OF_RESOURCE ;
@@ -248,6 +252,10 @@ OBJ_CLASS_INSTANCE(
248252static int accelerator_cuda_create_event (int dev_id , opal_accelerator_event_t * * event )
249253{
250254 CUresult result ;
255+ int delayed_init = opal_accelerator_cuda_delayed_init ();
256+ if (OPAL_UNLIKELY (0 != delayed_init )) {
257+ return delayed_init ;
258+ }
251259
252260 * event = (opal_accelerator_event_t * )OBJ_NEW (opal_accelerator_cuda_event_t );
253261 if (NULL == * event ) {
@@ -340,6 +348,11 @@ static int accelerator_cuda_memcpy_async(int dest_dev_id, int src_dev_id, void *
340348{
341349 CUresult result ;
342350
351+ int delayed_init = opal_accelerator_cuda_delayed_init ();
352+ if (OPAL_UNLIKELY (0 != delayed_init )) {
353+ return delayed_init ;
354+ }
355+
343356 if (NULL == stream || NULL == dest || NULL == src || size <= 0 ) {
344357 return OPAL_ERR_BAD_PARAM ;
345358 }
@@ -358,6 +371,11 @@ static int accelerator_cuda_memcpy(int dest_dev_id, int src_dev_id, void *dest,
358371{
359372 CUresult result ;
360373
374+ int delayed_init = opal_accelerator_cuda_delayed_init ();
375+ if (OPAL_UNLIKELY (0 != delayed_init )) {
376+ return delayed_init ;
377+ }
378+
361379 if (NULL == dest || NULL == src || size <= 0 ) {
362380 return OPAL_ERR_BAD_PARAM ;
363381 }
@@ -391,6 +409,11 @@ static int accelerator_cuda_memmove(int dest_dev_id, int src_dev_id, void *dest,
391409 CUdeviceptr tmp ;
392410 CUresult result ;
393411
412+ int delayed_init = opal_accelerator_cuda_delayed_init ();
413+ if (OPAL_UNLIKELY (0 != delayed_init )) {
414+ return delayed_init ;
415+ }
416+
394417 if (NULL == dest || NULL == src || size <= 0 ) {
395418 return OPAL_ERR_BAD_PARAM ;
396419 }
@@ -425,6 +448,11 @@ static int accelerator_cuda_mem_alloc(int dev_id, void **ptr, size_t size)
425448{
426449 CUresult result ;
427450
451+ int delayed_init = opal_accelerator_cuda_delayed_init ();
452+ if (OPAL_UNLIKELY (0 != delayed_init )) {
453+ return delayed_init ;
454+ }
455+
428456 if (NULL == ptr || 0 == size ) {
429457 return OPAL_ERR_BAD_PARAM ;
430458 }
@@ -434,7 +462,7 @@ static int accelerator_cuda_mem_alloc(int dev_id, void **ptr, size_t size)
434462 if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
435463 opal_show_help ("help-accelerator-cuda.txt" , "cuMemAlloc failed" , true,
436464 OPAL_PROC_MY_HOSTNAME , result );
437- return result ;
465+ return OPAL_ERROR ;
438466 }
439467 }
440468 return 0 ;
@@ -448,7 +476,7 @@ static int accelerator_cuda_mem_release(int dev_id, void *ptr)
448476 if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
449477 opal_show_help ("help-accelerator-cuda.txt" , "cuMemFree failed" , true,
450478 OPAL_PROC_MY_HOSTNAME , result );
451- return result ;
479+ return OPAL_ERROR ;
452480 }
453481 }
454482 return 0 ;
@@ -459,6 +487,11 @@ static int accelerator_cuda_get_address_range(int dev_id, const void *ptr, void
459487{
460488 CUresult result ;
461489
490+ int delayed_init = opal_accelerator_cuda_delayed_init ();
491+ if (OPAL_UNLIKELY (0 != delayed_init )) {
492+ return delayed_init ;
493+ }
494+
462495 if (NULL == ptr || NULL == base || NULL == size ) {
463496 return OPAL_ERR_BAD_PARAM ;
464497 }
@@ -479,6 +512,11 @@ static int accelerator_cuda_get_address_range(int dev_id, const void *ptr, void
479512static int accelerator_cuda_host_register (int dev_id , void * ptr , size_t size )
480513{
481514 CUresult result ;
515+ int delayed_init = opal_accelerator_cuda_delayed_init ();
516+ if (OPAL_UNLIKELY (0 != delayed_init )) {
517+ return delayed_init ;
518+ }
519+
482520 if (NULL == ptr && size > 0 ) {
483521 return OPAL_ERR_BAD_PARAM ;
484522 }
@@ -487,7 +525,7 @@ static int accelerator_cuda_host_register(int dev_id, void *ptr, size_t size)
487525 if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
488526 opal_show_help ("help-accelerator-cuda.txt" , "cuMemHostRegister failed" , true,
489527 ptr , size , OPAL_PROC_MY_HOSTNAME , result );
490- return result ;
528+ return OPAL_ERROR ;
491529 }
492530
493531 return OPAL_SUCCESS ;
@@ -501,7 +539,7 @@ static int accelerator_cuda_host_unregister(int dev_id, void *ptr)
501539 if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
502540 opal_show_help ("help-accelerator-cuda.txt" , "cuMemHostUnregister failed" , true,
503541 ptr , OPAL_PROC_MY_HOSTNAME , result );
504- return result ;
542+ return OPAL_ERROR ;
505543 }
506544 }
507545 return OPAL_SUCCESS ;
@@ -512,6 +550,11 @@ static int accelerator_cuda_get_device(int *dev_id)
512550 CUdevice cuDev ;
513551 CUresult result ;
514552
553+ int delayed_init = opal_accelerator_cuda_delayed_init ();
554+ if (OPAL_UNLIKELY (0 != delayed_init )) {
555+ return delayed_init ;
556+ }
557+
515558 if (NULL == dev_id ) {
516559 return OPAL_ERR_BAD_PARAM ;
517560 }
@@ -520,7 +563,7 @@ static int accelerator_cuda_get_device(int *dev_id)
520563 if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
521564 opal_show_help ("help-accelerator-cuda.txt" , "cuCtxGetDevice failed" , true,
522565 result );
523- return result ;
566+ return OPAL_ERROR ;
524567 }
525568 * dev_id = cuDev ;
526569 return 0 ;
@@ -530,6 +573,11 @@ static int accelerator_cuda_device_can_access_peer(int *access, int dev1, int de
530573{
531574 CUresult result ;
532575
576+ int delayed_init = opal_accelerator_cuda_delayed_init ();
577+ if (OPAL_UNLIKELY (0 != delayed_init )) {
578+ return delayed_init ;
579+ }
580+
533581 if (NULL == access ) {
534582 return OPAL_ERR_BAD_PARAM ;
535583 }
@@ -538,7 +586,7 @@ static int accelerator_cuda_device_can_access_peer(int *access, int dev1, int de
538586 if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
539587 opal_show_help ("help-accelerator-cuda.txt" , "cuDeviceCanAccessPeer failed" , true,
540588 OPAL_PROC_MY_HOSTNAME , result );
541- return result ;
589+ return OPAL_ERROR ;
542590 }
543591 return 0 ;
544592}
@@ -554,18 +602,24 @@ static int accelerator_cuda_get_buffer_id(int dev_id, const void *addr, opal_acc
554602{
555603 CUresult result ;
556604 int enable = 1 ;
605+
606+ int delayed_init = opal_accelerator_cuda_delayed_init ();
607+ if (OPAL_UNLIKELY (0 != delayed_init )) {
608+ return delayed_init ;
609+ }
610+
557611 result = cuPointerGetAttribute ((unsigned long long * )buf_id , CU_POINTER_ATTRIBUTE_BUFFER_ID , (CUdeviceptr ) addr );
558612 if (OPAL_UNLIKELY (result != CUDA_SUCCESS )) {
559613 opal_show_help ("help-accelerator-cuda.txt" , "bufferID failed" , true, OPAL_PROC_MY_HOSTNAME ,
560614 result );
561- return result ;
615+ return OPAL_ERROR ;
562616 }
563617 result = cuPointerSetAttribute (& enable , CU_POINTER_ATTRIBUTE_SYNC_MEMOPS ,
564618 (CUdeviceptr ) addr );
565619 if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
566620 opal_show_help ("help-accelerator-cuda.txt" , "cuPointerSetAttribute failed" , true,
567621 OPAL_PROC_MY_HOSTNAME , result , addr );
568- return result ;
622+ return OPAL_ERROR ;
569623 }
570624 return OPAL_SUCCESS ;
571625}
0 commit comments