2626static int accelerator_cuda_check_addr (const void * addr , int * dev_id , uint64_t * flags );
2727static int accelerator_cuda_create_stream (int dev_id , opal_accelerator_stream_t * * stream );
2828
29- static int accelerator_cuda_create_event (int dev_id , opal_accelerator_event_t * * event );
29+ static int accelerator_cuda_create_event (int dev_id , opal_accelerator_event_t * * event , bool enable_ipc );
3030static int accelerator_cuda_record_event (int dev_id , opal_accelerator_event_t * event , opal_accelerator_stream_t * stream );
3131static int accelerator_cuda_query_event (int dev_id , opal_accelerator_event_t * event );
32+ static int accelerator_cuda_wait_event (int dev_id , opal_accelerator_event_t * event , opal_accelerator_stream_t * stream );
3233
3334static int accelerator_cuda_memcpy_async (int dest_dev_id , int src_dev_id , void * dest , const void * src , size_t size ,
3435 opal_accelerator_stream_t * stream , opal_accelerator_transfer_type_t type );
@@ -44,10 +45,14 @@ static int accelerator_cuda_get_address_range(int dev_id, const void *ptr, void
4445static bool accelerator_cuda_is_ipc_enabled (void );
4546static int accelerator_cuda_get_ipc_handle (int dev_id , void * dev_ptr ,
4647 opal_accelerator_ipc_handle_t * handle );
48+ static int accelerator_cuda_import_ipc_handle (int dev_id , uint8_t ipc_handle [IPC_MAX_HANDLE_SIZE ],
49+ opal_accelerator_ipc_handle_t * handle );
4750static int accelerator_cuda_open_ipc_handle (int dev_id , opal_accelerator_ipc_handle_t * handle ,
4851 void * * dev_ptr );
4952static int accelerator_cuda_get_ipc_event_handle (opal_accelerator_event_t * event ,
5053 opal_accelerator_ipc_event_handle_t * handle );
54+ static int accelerator_cuda_import_ipc_event_handle (uint8_t ipc_handle [IPC_MAX_HANDLE_SIZE ],
55+ opal_accelerator_ipc_event_handle_t * handle );
5156static int accelerator_cuda_open_ipc_event_handle (opal_accelerator_ipc_event_handle_t * handle ,
5257 opal_accelerator_event_t * event );
5358
@@ -60,6 +65,8 @@ static int accelerator_cuda_device_can_access_peer( int *access, int dev1, int d
6065
6166static int accelerator_cuda_get_buffer_id (int dev_id , const void * addr , opal_accelerator_buffer_id_t * buf_id );
6267
68+ #define GET_STREAM (_stream ) (_stream == MCA_ACCELERATOR_STREAM_DEFAULT ? 0 : *((CUstream *)_stream->stream))
69+
6370opal_accelerator_base_module_t opal_accelerator_cuda_module =
6471{
6572 accelerator_cuda_check_addr ,
@@ -69,6 +76,7 @@ opal_accelerator_base_module_t opal_accelerator_cuda_module =
6976 accelerator_cuda_create_event ,
7077 accelerator_cuda_record_event ,
7178 accelerator_cuda_query_event ,
79+ accelerator_cuda_wait_event ,
7280
7381 accelerator_cuda_memcpy_async ,
7482 accelerator_cuda_memcpy ,
@@ -79,8 +87,10 @@ opal_accelerator_base_module_t opal_accelerator_cuda_module =
7987
8088 accelerator_cuda_is_ipc_enabled ,
8189 accelerator_cuda_get_ipc_handle ,
90+ accelerator_cuda_import_ipc_handle ,
8291 accelerator_cuda_open_ipc_handle ,
8392 accelerator_cuda_get_ipc_event_handle ,
93+ accelerator_cuda_import_ipc_event_handle ,
8494 accelerator_cuda_open_ipc_event_handle ,
8595
8696 accelerator_cuda_host_register ,
@@ -260,7 +270,8 @@ static void opal_accelerator_cuda_stream_destruct(opal_accelerator_cuda_stream_t
260270{
261271 CUresult result ;
262272
263- if (NULL != stream -> base .stream ) {
273+ if (MCA_ACCELERATOR_STREAM_DEFAULT != (opal_accelerator_stream_t * )stream &&
274+ NULL != stream -> base .stream ) {
264275 result = cuStreamDestroy (* (CUstream * )stream -> base .stream );
265276 if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
266277 opal_show_help ("help-accelerator-cuda.txt" , "cuStreamDestroy failed" , true,
@@ -276,7 +287,7 @@ OBJ_CLASS_INSTANCE(
276287 NULL ,
277288 opal_accelerator_cuda_stream_destruct );
278289
279- static int accelerator_cuda_create_event (int dev_id , opal_accelerator_event_t * * event )
290+ static int accelerator_cuda_create_event (int dev_id , opal_accelerator_event_t * * event , bool enable_ipc )
280291{
281292 CUresult result ;
282293 int delayed_init = opal_accelerator_cuda_delayed_init ();
@@ -294,7 +305,8 @@ static int accelerator_cuda_create_event(int dev_id, opal_accelerator_event_t **
294305 OBJ_RELEASE (* event );
295306 return OPAL_ERR_OUT_OF_RESOURCE ;
296307 }
297- result = cuEventCreate ((* event )-> event , CU_EVENT_DISABLE_TIMING );
308+ result = cuEventCreate ((* event )-> event , enable_ipc ? CU_EVENT_DISABLE_TIMING |CU_EVENT_INTERPROCESS :
309+ CU_EVENT_DISABLE_TIMING );
298310 if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
299311 opal_show_help ("help-accelerator-cuda.txt" , "cuEventCreate failed" , true,
300312 OPAL_PROC_MY_HOSTNAME , result );
@@ -328,11 +340,13 @@ static int accelerator_cuda_record_event(int dev_id, opal_accelerator_event_t *e
328340{
329341 CUresult result ;
330342
331- if (NULL == stream || NULL == event ) {
343+ if ((MCA_ACCELERATOR_STREAM_DEFAULT != stream &&
344+ (NULL == stream || NULL == stream -> stream )) ||
345+ NULL == event ) {
332346 return OPAL_ERR_BAD_PARAM ;
333347 }
334348
335- result = cuEventRecord (* (CUevent * )event -> event , * ( CUstream * ) stream -> stream );
349+ result = cuEventRecord (* (CUevent * )event -> event , GET_STREAM ( stream ) );
336350 if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
337351 opal_show_help ("help-accelerator-cuda.txt" , "cuEventRecord failed" , true,
338352 OPAL_PROC_MY_HOSTNAME , result );
@@ -369,6 +383,10 @@ static int accelerator_cuda_query_event(int dev_id, opal_accelerator_event_t *ev
369383 }
370384 }
371385}
386+ static int accelerator_cuda_wait_event (int dev_id , opal_accelerator_event_t * event , opal_accelerator_stream_t * stream )
387+ {
388+ return OPAL_ERR_NOT_IMPLEMENTED ;
389+ }
372390
373391static int accelerator_cuda_memcpy_async (int dest_dev_id , int src_dev_id , void * dest , const void * src , size_t size ,
374392 opal_accelerator_stream_t * stream , opal_accelerator_transfer_type_t type )
@@ -380,11 +398,12 @@ static int accelerator_cuda_memcpy_async(int dest_dev_id, int src_dev_id, void *
380398 return delayed_init ;
381399 }
382400
383- if (NULL == stream || NULL == dest || NULL == src || size <= 0 ) {
401+ if ((MCA_ACCELERATOR_STREAM_DEFAULT != stream && NULL == stream ) ||
402+ NULL == dest || NULL == src || size <= 0 ) {
384403 return OPAL_ERR_BAD_PARAM ;
385404 }
386405
387- result = cuMemcpyAsync ((CUdeviceptr ) dest , (CUdeviceptr ) src , size , * ( CUstream * ) stream -> stream );
406+ result = cuMemcpyAsync ((CUdeviceptr ) dest , (CUdeviceptr ) src , size , GET_STREAM ( stream ) );
388407 if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
389408 opal_show_help ("help-accelerator-cuda.txt" , "cuMemcpyAsync failed" , true, dest , src ,
390409 size , result );
@@ -547,6 +566,12 @@ static int accelerator_cuda_get_ipc_handle(int dev_id, void *dev_ptr,
547566 return OPAL_ERR_NOT_IMPLEMENTED ;
548567}
549568
569+ static int accelerator_cuda_import_ipc_handle (int dev_id , uint8_t ipc_handle [IPC_MAX_HANDLE_SIZE ],
570+ opal_accelerator_ipc_handle_t * handle )
571+ {
572+ return OPAL_ERR_NOT_IMPLEMENTED ;
573+ }
574+
550575static int accelerator_cuda_open_ipc_handle (int dev_id , opal_accelerator_ipc_handle_t * handle ,
551576 void * * dev_ptr )
552577{
@@ -559,6 +584,12 @@ static int accelerator_cuda_get_ipc_event_handle(opal_accelerator_event_t *event
559584 return OPAL_ERR_NOT_IMPLEMENTED ;
560585}
561586
587+ static int accelerator_cuda_import_ipc_event_handle (uint8_t ipc_handle [IPC_MAX_HANDLE_SIZE ],
588+ opal_accelerator_ipc_event_handle_t * handle )
589+ {
590+ return OPAL_ERR_NOT_IMPLEMENTED ;
591+ }
592+
562593static int accelerator_cuda_open_ipc_event_handle (opal_accelerator_ipc_event_handle_t * handle ,
563594 opal_accelerator_event_t * event )
564595{
0 commit comments