@@ -47,24 +47,32 @@ static int opal_common_ofi_init_ref_cnt = 0;
4747
4848#ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR
4949
50+ /*
51+ * These no-op functions are necessary since libfabric does not allow null
52+ * function pointers here.
53+ */
5054static int opal_common_ofi_monitor_start (struct fid_mem_monitor * monitor )
5155{
5256 return 0 ;
5357}
58+
5459static void opal_common_ofi_monitor_stop (struct fid_mem_monitor * monitor )
5560{
5661 return ;
5762}
63+
5864static int opal_common_ofi_monitor_subscribe (struct fid_mem_monitor * monitor ,
5965 const void * addr , size_t len )
6066{
6167 return 0 ;
6268}
69+
6370static void opal_common_ofi_monitor_unsubscribe (struct fid_mem_monitor * monitor ,
6471 const void * addr , size_t len )
6572{
6673 return ;
6774}
75+
6876static bool opal_common_ofi_monitor_valid (struct fid_mem_monitor * monitor ,
6977 const void * addr , size_t len )
7078{
@@ -88,6 +96,7 @@ static void opal_common_ofi_mem_release_cb(void *buf, size_t length,
8896 opal_common_ofi_monitor -> import_ops -> notify (opal_common_ofi_monitor ,
8997 buf , length );
9098}
99+
91100#endif /* HAVE_STRUCT_FI_OPS_MEM_MONITOR */
92101
93102int opal_common_ofi_init (void )
@@ -107,6 +116,12 @@ int opal_common_ofi_init(void)
107116 return OPAL_SUCCESS ;
108117 }
109118
119+ /*
120+ * This cache object doesn't do much, but is necessary for the API to work.
121+ * It is required to call the fi_import_fid API. This API was introduced in
122+ * libfabric version 1.13.0 and "mr_cache" is a "well known" name (documented
123+ * in libfabric) to indicate the type of object that we are trying to open.
124+ */
110125 ret = fi_open (FI_VERSION (1 ,13 ), "mr_cache" , NULL , 0 , 0 , & opal_common_ofi_cache_fid , NULL );
111126 if (ret ) {
112127 goto err ;
@@ -119,6 +134,13 @@ int opal_common_ofi_init(void)
119134
120135 opal_common_ofi_monitor -> fid .fclass = FI_CLASS_MEM_MONITOR ;
121136 opal_common_ofi_monitor -> export_ops = & opal_common_ofi_export_ops ;
137+ /*
138+ * This import_fid call must occur before the libfabric provider creates
139+ * its memory registration cache. This will typically occur during domain
140+ * open as it is a domain level object. We put it early in initialization
141+ * to guarantee this and share the import monitor between the ofi btl
142+ * and ofi mtl.
143+ */
122144 ret = fi_import_fid (opal_common_ofi_cache_fid , & opal_common_ofi_monitor -> fid , 0 );
123145 if (ret ) {
124146 goto err ;
@@ -491,61 +513,6 @@ static uint32_t get_package_rank(opal_process_info_t *process_info)
491513 return (uint32_t ) package_ranks [process_info -> my_local_rank ];
492514}
493515
494- /* Selects a NIC based on hardware locality between process cpuset and device BDF.
495- *
496- * Initializes opal_hwloc_topology to access hardware topology if not previously
497- * initialized
498- *
499- * There are 3 main cases that this covers:
500- *
501- * 1. If the first provider passed into this function is the only valid
502- * provider, this provider is returned.
503- *
504- * 2. If there is more than 1 provider that matches the type of the first
505- * provider in the list, and the BDF data
506- * is available then a provider is selected based on locality of device
507- * cpuset and process cpuset and tries to ensure that processes are distributed
508- * evenly across NICs. This has two separate cases:
509- *
510- * i. There is one or more provider local to the process:
511- *
512- * (local rank % number of providers of the same type that share the process cpuset)
513- * is used to select one of these providers.
514- *
515- * ii. There is no provider that is local to the process:
516- *
517- * (local rank % number of providers of the same type)
518- * is used to select one of these providers
519- *
520- * 3. If there is more than 1 providers of the same type in the list, and the BDF data
521- * is not available (the ofi version does not support fi_info.nic or the
522- * provider does not support BDF) then (local rank % number of providers of the same type)
523- * is used to select one of these providers
524- *
525- * @param provider_list (IN) struct fi_info* An initially selected
526- * provider NIC. The provider name and
527- * attributes are used to restrict NIC
528- * selection. This provider is returned if the
529- * NIC selection fails.
530- *
531- * @param package_rank (IN) uint32_t The rank of the process. Used to
532- * select one valid NIC if there is a case
533- * where more than one can be selected. This
534- * could occur when more than one provider
535- * shares the same cpuset as the process.
536- * This could either be a package_rank if one is
537- * successfully calculated, or the process id.
538- *
539- * @param provider (OUT) struct fi_info* object with the selected
540- * provider if the selection succeeds
541- * if the selection fails, returns the fi_info
542- * object that was initially provided.
543- *
544- * All errors should be recoverable and will return the initially provided
545- * provider. However, if an error occurs we can no longer guarantee
546- * that the provider returned is local to the process or that the processes will
547- * balance across available NICs.
548- */
549516struct fi_info * opal_mca_common_ofi_select_provider (struct fi_info * provider_list ,
550517 opal_process_info_t * process_info )
551518{
0 commit comments