1717#include "opal/mca/base/mca_base_framework.h"
1818#include "opal/mca/pmix/pmix-internal.h"
1919#include "opal/memoryhooks/memory.h"
20+ #include "opal/util/argv.h"
2021
2122#include <ucm/api/ucm.h>
23+ #include <fnmatch.h>
24+ #include <stdio.h>
2225
2326/***********************************************************************/
2427
@@ -28,7 +31,8 @@ opal_common_ucx_module_t opal_common_ucx = {
2831 .verbose = 0 ,
2932 .progress_iterations = 100 ,
3033 .registered = 0 ,
31- .opal_mem_hooks = 0
34+ .opal_mem_hooks = 0 ,
35+ .tls = NULL
3236};
3337
3438static void opal_common_ucx_mem_release_cb (void * buf , size_t length ,
@@ -39,10 +43,15 @@ static void opal_common_ucx_mem_release_cb(void *buf, size_t length,
3943
4044OPAL_DECLSPEC void opal_common_ucx_mca_var_register (const mca_base_component_t * component )
4145{
46+ static const char * default_tls = "rc_verbs,ud_verbs,rc_mlx5,dc_mlx5,cuda_ipc,rocm_ipc" ;
47+ static const char * default_devices = "mlx*" ;
4248 static int registered = 0 ;
4349 static int hook_index ;
4450 static int verbose_index ;
4551 static int progress_index ;
52+ static int tls_index ;
53+ static int devices_index ;
54+
4655 if (!registered ) {
4756 verbose_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "verbose" ,
4857 "Verbose level of the UCX components" ,
@@ -63,6 +72,29 @@ OPAL_DECLSPEC void opal_common_ucx_mca_var_register(const mca_base_component_t *
6372 OPAL_INFO_LVL_3 ,
6473 MCA_BASE_VAR_SCOPE_LOCAL ,
6574 & opal_common_ucx .opal_mem_hooks );
75+
76+ opal_common_ucx .tls = malloc (sizeof (* opal_common_ucx .tls ));
77+ * opal_common_ucx .tls = strdup (default_tls );
78+ tls_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "tls" ,
79+ "List of UCX transports which should be supported on the system, to enable "
80+ "selecting the UCX component. Special values: any (any available). "
81+ "A '^' prefix negates the list. "
82+ "For example, in order to exclude on shared memory and TCP transports, "
83+ "please set to '^posix,sysv,self,tcp,cma,knem,xpmem'." ,
84+ MCA_BASE_VAR_TYPE_STRING , NULL , 0 , 0 ,
85+ OPAL_INFO_LVL_3 ,
86+ MCA_BASE_VAR_SCOPE_LOCAL ,
87+ opal_common_ucx .tls );
88+
89+ opal_common_ucx .devices = malloc (sizeof (* opal_common_ucx .devices ));
90+ * opal_common_ucx .devices = strdup (default_devices );
91+ devices_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "devices" ,
92+ "List of device driver pattern names, which, if supported by UCX, will "
93+ "bump its priority above ob1. Special values: any (any available)" ,
94+ MCA_BASE_VAR_TYPE_STRING , NULL , 0 , 0 ,
95+ OPAL_INFO_LVL_3 ,
96+ MCA_BASE_VAR_SCOPE_LOCAL ,
97+ opal_common_ucx .devices );
6698 registered = 1 ;
6799 }
68100 if (component ) {
@@ -78,6 +110,14 @@ OPAL_DECLSPEC void opal_common_ucx_mca_var_register(const mca_base_component_t *
78110 component -> mca_type_name ,
79111 component -> mca_component_name ,
80112 "opal_mem_hooks" , 0 );
113+ mca_base_var_register_synonym (tls_index , component -> mca_project_name ,
114+ component -> mca_type_name ,
115+ component -> mca_component_name ,
116+ "tls" , 0 );
117+ mca_base_var_register_synonym (devices_index , component -> mca_project_name ,
118+ component -> mca_type_name ,
119+ component -> mca_component_name ,
120+ "devices" , 0 );
81121 }
82122}
83123
@@ -126,6 +166,166 @@ OPAL_DECLSPEC void opal_common_ucx_mca_deregister(void)
126166 opal_output_close (opal_common_ucx .output );
127167}
128168
169+ #if HAVE_DECL_OPEN_MEMSTREAM
170+ static bool opal_common_ucx_check_device (const char * device_name , char * * device_list )
171+ {
172+ char sysfs_driver_link [PATH_MAX ];
173+ char driver_path [PATH_MAX ];
174+ char * ib_device_name ;
175+ char * driver_name ;
176+ char * * list_item ;
177+ ssize_t ret ;
178+
179+ /* mlx5_0:1 */
180+ ret = sscanf (device_name , "%m[^:]%*d" , & ib_device_name );
181+ if (ret != 1 ) {
182+ return false;
183+ }
184+
185+ sysfs_driver_link [sizeof (sysfs_driver_link ) - 1 ] = '\0' ;
186+ snprintf (sysfs_driver_link , sizeof (sysfs_driver_link ) - 1 ,
187+ "/sys/class/infiniband/%s/device/driver" , ib_device_name );
188+ free (ib_device_name );
189+
190+ driver_path [sizeof (driver_path ) - 1 ] = '\0' ;
191+ ret = readlink (sysfs_driver_link , driver_path , sizeof (driver_path ) - 1 );
192+ if (ret < 0 ) {
193+ MCA_COMMON_UCX_VERBOSE (2 , "readlink(%s) failed: %s" , sysfs_driver_link ,
194+ strerror (errno ));
195+ return false;
196+ }
197+
198+ driver_name = basename (driver_path );
199+ for (list_item = device_list ; * list_item != NULL ; ++ list_item ) {
200+ if (!fnmatch (* list_item , driver_name , 0 )) {
201+ MCA_COMMON_UCX_VERBOSE (2 , "driver '%s' matched by '%s'" ,
202+ driver_path , * list_item );
203+ return true;
204+ }
205+ }
206+
207+ return false;
208+ }
209+ #endif
210+
211+ OPAL_DECLSPEC opal_common_ucx_support_level_t
212+ opal_common_ucx_support_level (ucp_context_h context )
213+ {
214+ opal_common_ucx_support_level_t support_level = OPAL_COMMON_UCX_SUPPORT_NONE ;
215+ static const char * support_level_names [] = {
216+ [OPAL_COMMON_UCX_SUPPORT_NONE ] = "none" ,
217+ [OPAL_COMMON_UCX_SUPPORT_TRANSPORT ] = "transports only" ,
218+ [OPAL_COMMON_UCX_SUPPORT_DEVICE ] = "transports and devices"
219+ };
220+ #if HAVE_DECL_OPEN_MEMSTREAM
221+ char * rsc_tl_name , * rsc_device_name ;
222+ char * * tl_list , * * device_list , * * list_item ;
223+ bool is_any_tl , is_any_device ;
224+ bool found_tl , negate ;
225+ char line [128 ];
226+ FILE * stream ;
227+ char * buffer ;
228+ size_t size ;
229+ int ret ;
230+ #endif
231+
232+ is_any_tl = !strcmp (* opal_common_ucx .tls , "any" );
233+ is_any_device = !strcmp (* opal_common_ucx .devices , "any" );
234+
235+ /* Check for special value "any" */
236+ if (is_any_tl && is_any_device ) {
237+ MCA_COMMON_UCX_VERBOSE (1 , "ucx is enabled on any transport or device" ,
238+ * opal_common_ucx .tls );
239+ support_level = OPAL_COMMON_UCX_SUPPORT_DEVICE ;
240+ goto out ;
241+ }
242+
243+ #if HAVE_DECL_OPEN_MEMSTREAM
244+ /* Split transports list */
245+ negate = ('^' == (* opal_common_ucx .tls )[0 ]);
246+ tl_list = opal_argv_split (* opal_common_ucx .tls + (negate ? 1 : 0 ), ',' );
247+ if (tl_list == NULL ) {
248+ MCA_COMMON_UCX_VERBOSE (1 , "failed to split tl list '%s', ucx is disabled" ,
249+ * opal_common_ucx .tls );
250+ goto out ;
251+ }
252+
253+ /* Split devices list */
254+ device_list = opal_argv_split (* opal_common_ucx .devices , ',' );
255+ if (device_list == NULL ) {
256+ MCA_COMMON_UCX_VERBOSE (1 , "failed to split devices list '%s', ucx is disabled" ,
257+ * opal_common_ucx .devices );
258+ goto out_free_tl_list ;
259+ }
260+
261+ /* Open memory stream to dump UCX information to */
262+ stream = open_memstream (& buffer , & size );
263+ if (stream == NULL ) {
264+ MCA_COMMON_UCX_VERBOSE (1 , "failed to open memory stream for ucx info (%s), "
265+ "ucx is disabled" , strerror (errno ));
266+ goto out_free_device_list ;
267+ }
268+
269+ /* Print ucx transports information to the memory stream */
270+ ucp_context_print_info (context , stream );
271+
272+ /* Rewind and read transports/devices list from the stream */
273+ fseek (stream , 0 , SEEK_SET );
274+ while ((support_level != OPAL_COMMON_UCX_SUPPORT_DEVICE ) &&
275+ (fgets (line , sizeof (line ), stream ) != NULL )) {
276+ rsc_tl_name = NULL ;
277+ ret = sscanf (line ,
278+ /* "# resource 6 : md 5 dev 4 flags -- rc_verbs/mlx5_0:1" */
279+ "# resource %*d : md %*d dev %*d flags -- %m[^/ \n\r]/%m[^/ \n\r]" ,
280+ & rsc_tl_name , & rsc_device_name );
281+ if (ret != 2 ) {
282+ free (rsc_tl_name );
283+ continue ;
284+ }
285+
286+ /* Check if 'rsc_tl_name' is found provided list */
287+ found_tl = is_any_tl ;
288+ for (list_item = tl_list ; !found_tl && (* list_item != NULL ); ++ list_item ) {
289+ found_tl = !strcmp (* list_item , rsc_tl_name );
290+ }
291+
292+ /* Check if the transport has a match (either positive or negative) */
293+ assert (!(is_any_tl && negate ));
294+ if (found_tl != negate ) {
295+ if (is_any_device ||
296+ opal_common_ucx_check_device (rsc_device_name , device_list )) {
297+ MCA_COMMON_UCX_VERBOSE (2 , "%s/%s: matched both transport and device list" ,
298+ rsc_tl_name , rsc_device_name );
299+ support_level = OPAL_COMMON_UCX_SUPPORT_DEVICE ;
300+ } else {
301+ MCA_COMMON_UCX_VERBOSE (2 , "%s/%s: matched transport list but not device list" ,
302+ rsc_tl_name , rsc_device_name );
303+ support_level = OPAL_COMMON_UCX_SUPPORT_TRANSPORT ;
304+ }
305+ } else {
306+ MCA_COMMON_UCX_VERBOSE (2 , "%s/%s: did not match transport list" ,
307+ rsc_tl_name , rsc_device_name );
308+ }
309+
310+ free (rsc_device_name );
311+ free (rsc_tl_name );
312+ }
313+
314+ MCA_COMMON_UCX_VERBOSE (2 , "support level is %s" , support_level_names [support_level ]);
315+ fclose (stream );
316+ free (buffer );
317+
318+ out_free_device_list :
319+ opal_argv_free (device_list );
320+ out_free_tl_list :
321+ opal_argv_free (tl_list );
322+ out :
323+ #else
324+ MCA_COMMON_UCX_VERBOSE (2 , "open_memstream() was not found, ucx is disabled" );
325+ #endif
326+ return support_level ;
327+ }
328+
129329void opal_common_ucx_empty_complete_cb (void * request , ucs_status_t status )
130330{
131331}
0 commit comments