@@ -41,12 +41,21 @@ opal_common_ofi_module_t opal_common_ofi = {.prov_include = NULL,
4141static const char default_prov_exclude_list [] = "shm,sockets,tcp,udp,rstream,usnic" ;
4242static opal_mutex_t opal_common_ofi_mutex = OPAL_MUTEX_STATIC_INIT ;
4343static int opal_common_ofi_init_ref_cnt = 0 ;
44+ static bool opal_common_ofi_installed_memory_monitor = false;
4445
4546#ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR
4647
4748/*
48- * These no-op functions are necessary since libfabric does not allow null
49- * function pointers here.
49+ * Monitor object to export into Libfabric to provide memory release
50+ * notifications using our own memory hooks framework. Monitors may
51+ * use the subscribe/unsubscribe notifications to reduce unnecessary
52+ * notifications, but are not required to do so. Because patcher
53+ * notifies about all releases, it is cheaper for us to not filter and
54+ * this monitor can safely ignore subscribe/unsubscribe notifications.
55+ *
56+ * Libfabric requires the object to be fully defined. Unlike most of
57+ * Open MPI, it does not have NULL function pointer checks in calling
58+ * code.
5059 */
5160static int opal_common_ofi_monitor_start (struct fid_mem_monitor * monitor )
5261{
@@ -76,8 +85,8 @@ static bool opal_common_ofi_monitor_valid(struct fid_mem_monitor *monitor,
7685 return true;
7786}
7887
79- static struct fid_mem_monitor * opal_common_ofi_monitor ;
80- static struct fid * opal_common_ofi_cache_fid ;
88+ static struct fid_mem_monitor * opal_common_ofi_monitor = NULL ;
89+ static struct fid * opal_common_ofi_cache_fid = NULL ;
8190static struct fi_ops_mem_monitor opal_common_ofi_export_ops = {
8291 .size = sizeof (struct fi_ops_mem_monitor ),
8392 .start = opal_common_ofi_monitor_start ,
@@ -87,6 +96,12 @@ static struct fi_ops_mem_monitor opal_common_ofi_export_ops = {
8796 .valid = opal_common_ofi_monitor_valid ,
8897};
8998
99+ /**
100+ * Callback function from Open MPI memory monitor
101+ *
102+ * Translation function between the callback function from Open MPI's
103+ * memory notifier to the Libfabric memory monitor.
104+ */
90105static void opal_common_ofi_mem_release_cb (void * buf , size_t length ,
91106 void * cbdata , bool from_alloc )
92107{
@@ -96,68 +111,110 @@ static void opal_common_ofi_mem_release_cb(void *buf, size_t length,
96111
97112#endif /* HAVE_STRUCT_FI_OPS_MEM_MONITOR */
98113
99- int opal_common_ofi_open (void )
114+ int opal_common_ofi_export_memory_monitor (void )
100115{
101- int ret ;
116+ int ret = - FI_ENOSYS ;
102117
103- if ((opal_common_ofi_init_ref_cnt ++ ) > 0 ) {
104- return OPAL_SUCCESS ;
105- }
106118#ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR
119+ OPAL_THREAD_LOCK (& opal_common_ofi_mutex );
120+
121+ if (NULL != opal_common_ofi_cache_fid ) {
122+ return 0 ;
123+ }
124+
125+ /*
126+ * While the memory import functionality was introduced in 1.13,
127+ * some deadlock bugs exist in the 1.13 series. Require version
128+ * 1.14 before this code is activated. Not activating the code
129+ * should not break any functionality directly, but may lead to
130+ * sub-optimal memory monitors being used in Libfabric, as Open
131+ * MPI will almost certainly install a patcher first.
132+ */
133+ if (FI_VERSION_LT (fi_version (), FI_VERSION (1 , 14 ))) {
134+ ret = - FI_ENOSYS ;
135+ goto err ;
136+ }
107137
108- mca_base_framework_open (& opal_memory_base_framework , 0 );
138+ ret = mca_base_framework_open (& opal_memory_base_framework , 0 );
139+ if (OPAL_SUCCESS != ret ) {
140+ ret = - FI_ENOSYS ;
141+ goto err ;
142+ }
109143 if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT )
110144 != (((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT ))
111145 & opal_mem_hooks_support_level ())) {
112- return OPAL_SUCCESS ;
146+ ret = - FI_ENOSYS ;
147+ goto err ;
113148 }
114149
115150 /*
116- * This cache object doesn't do much, but is necessary for the API to work.
117- * It is required to call the fi_import_fid API. This API was introduced in
118- * libfabric version 1.13.0 and "mr_cache" is a "well known" name (documented
119- * in libfabric) to indicate the type of object that we are trying to open.
151+ * The monitor import object has the well known name "mr_cache"
152+ * and was introduced in Libfabric 1.13
120153 */
121- ret = fi_open (FI_VERSION (1 ,13 ), "mr_cache" , NULL , 0 , 0 , & opal_common_ofi_cache_fid , NULL );
122- if (ret ) {
154+ ret = fi_open (FI_VERSION (1 ,13 ), "mr_cache" , NULL , 0 , 0 ,
155+ & opal_common_ofi_cache_fid , NULL );
156+ if (0 != ret ) {
123157 goto err ;
124158 }
125159
126160 opal_common_ofi_monitor = calloc (1 , sizeof (* opal_common_ofi_monitor ));
127- if (!opal_common_ofi_monitor ) {
161+ if (NULL == opal_common_ofi_monitor ) {
162+ ret = - FI_ENOMEM ;
128163 goto err ;
129164 }
130165
131166 opal_common_ofi_monitor -> fid .fclass = FI_CLASS_MEM_MONITOR ;
132167 opal_common_ofi_monitor -> export_ops = & opal_common_ofi_export_ops ;
133- /*
134- * This import_fid call must occur before the libfabric provider creates
135- * its memory registration cache. This will typically occur during domain
136- * open as it is a domain level object. We put it early in initialization
137- * to guarantee this and share the import monitor between the ofi btl
138- * and ofi mtl.
139- */
140- ret = fi_import_fid (opal_common_ofi_cache_fid , & opal_common_ofi_monitor -> fid , 0 );
141- if (ret ) {
168+ ret = fi_import_fid (opal_common_ofi_cache_fid ,
169+ & opal_common_ofi_monitor -> fid , 0 );
170+ if (0 != ret ) {
142171 goto err ;
143172 }
144173 opal_mem_hooks_register_release (opal_common_ofi_mem_release_cb , NULL );
174+ opal_common_ofi_installed_memory_monitor = true;
175+
176+ ret = 0 ;
145177
146- return OPAL_SUCCESS ;
147178err :
148- if (opal_common_ofi_cache_fid ) {
149- fi_close (opal_common_ofi_cache_fid );
179+ if (0 != ret ) {
180+ if (NULL != opal_common_ofi_cache_fid ) {
181+ fi_close (opal_common_ofi_cache_fid );
182+ }
183+ if (NULL != opal_common_ofi_monitor ) {
184+ free (opal_common_ofi_monitor );
185+ }
150186 }
151- if (opal_common_ofi_monitor ) {
187+
188+ opal_common_ofi_installed_memory_monitor = false;
189+
190+ OPAL_THREAD_UNLOCK (& opal_common_ofi_mutex );
191+ #endif
192+
193+ return ret ;
194+ }
195+
196+ static int opal_common_ofi_remove_memory_monitor (void )
197+ {
198+ #ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR
199+ if (opal_common_ofi_installed_memory_monitor ) {
200+ opal_mem_hooks_unregister_release (opal_common_ofi_mem_release_cb );
201+ fi_close (opal_common_ofi_cache_fid );
202+ fi_close (& opal_common_ofi_monitor -> fid );
152203 free (opal_common_ofi_monitor );
204+ opal_common_ofi_installed_memory_monitor = false;
153205 }
206+ #endif
154207
155- opal_common_ofi_init_ref_cnt -- ;
208+ return OPAL_SUCCESS ;
209+ }
210+
211+ int opal_common_ofi_open (void )
212+ {
213+ if ((opal_common_ofi_init_ref_cnt ++ ) > 0 ) {
214+ return OPAL_SUCCESS ;
215+ }
156216
157- return OPAL_ERROR ;
158- #else
159217 return OPAL_SUCCESS ;
160- #endif
161218}
162219
163220int opal_common_ofi_close (void )
@@ -168,14 +225,12 @@ int opal_common_ofi_close(void)
168225 return OPAL_SUCCESS ;
169226 }
170227
171- #ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR
172- opal_mem_hooks_unregister_release (opal_common_ofi_mem_release_cb );
173- fi_close (opal_common_ofi_cache_fid );
174- fi_close (& opal_common_ofi_monitor -> fid );
175- free (opal_common_ofi_monitor );
176- #endif
228+ ret = opal_common_ofi_remove_memory_monitor ();
229+ if (OPAL_SUCCESS != ret ) {
230+ return ret ;
231+ }
177232
178- if (opal_common_ofi . output != -1 ) {
233+ if (-1 != opal_common_ofi . output ) {
179234 opal_output_close (opal_common_ofi .output );
180235 opal_common_ofi .output = -1 ;
181236 if (OPAL_SUCCESS != ret ) {
0 commit comments