1919 * Copyright (C) 2018 Mellanox Technologies, Ltd.
2020 * All rights reserved.
2121 * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
22- * Copyright (c) 2019 IBM Corporation. All rights reserved.
22+ * Copyright (c) 2019-2021 IBM Corporation. All rights reserved.
2323 * Copyright (c) 2019-2020 Inria. All rights reserved.
2424 * $COPYRIGHT$
2525 *
@@ -294,6 +294,18 @@ int opal_hwloc_base_get_topology(void)
294294 wildcard_rank .jobid = OPAL_PROC_MY_NAME .jobid ;
295295 wildcard_rank .vpid = OPAL_VPID_WILDCARD ;
296296
297+ // Did the user ask for a topo file at the mca line?
298+ // Check this first, before main methods.
299+ if (NULL != opal_hwloc_base_topo_file ) {
300+ opal_output_verbose (1 , opal_hwloc_base_framework .framework_output ,
301+ "hwloc:base loading topology from file %s" ,
302+ opal_hwloc_base_topo_file );
303+ if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology (opal_hwloc_base_topo_file ))) {
304+ return rc ;
305+ }
306+ goto done ;
307+ }
308+
297309#if HWLOC_API_VERSION >= 0x20000
298310 opal_output_verbose (2 , opal_hwloc_base_framework .framework_output ,
299311 "hwloc:base: looking for topology in shared memory" );
@@ -337,7 +349,7 @@ int opal_hwloc_base_get_topology(void)
337349 opal_output_verbose (2 , opal_hwloc_base_framework .framework_output ,
338350 "hwloc:base: topology in shared memory" );
339351 topo_in_shmem = true;
340- return OPAL_SUCCESS ;
352+ goto done ;
341353 }
342354 }
343355#endif
@@ -394,7 +406,7 @@ int opal_hwloc_base_get_topology(void)
394406 hwloc_topology_destroy (opal_hwloc_topology );
395407 return rc ;
396408 }
397- } else if ( NULL == opal_hwloc_base_topo_file ) {
409+ } else {
398410 opal_output_verbose (1 , opal_hwloc_base_framework .framework_output ,
399411 "hwloc:base discovering topology" );
400412 if (0 != hwloc_topology_init (& opal_hwloc_topology ) ||
@@ -408,15 +420,10 @@ int opal_hwloc_base_get_topology(void)
408420 hwloc_topology_destroy (opal_hwloc_topology );
409421 return rc ;
410422 }
411- } else {
412- opal_output_verbose (1 , opal_hwloc_base_framework .framework_output ,
413- "hwloc:base loading topology from file %s" ,
414- opal_hwloc_base_topo_file );
415- if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology (opal_hwloc_base_topo_file ))) {
416- return rc ;
417- }
418423 }
419424
425+ done :
426+
420427 /* fill opal_cache_line_size global with the smallest L1 cache
421428 line size */
422429 fill_cache_line_size ();
@@ -659,9 +666,11 @@ static hwloc_obj_t df_search(hwloc_topology_t topo,
659666// available = opal_hwloc_base_get_available_cpus(topo, start)
660667// and skipped objs that had hwloc_bitmap_iszero(available)
661668 hwloc_obj_t root ;
662- opal_hwloc_topo_data_t * rdata ;
669+ opal_hwloc_topo_data_t * rdata = NULL ;
663670 root = hwloc_get_root_obj (topo );
664- rdata = (opal_hwloc_topo_data_t * )root -> userdata ;
671+ if (false == topo_in_shmem ) {
672+ rdata = (opal_hwloc_topo_data_t * )root -> userdata ;
673+ }
665674 hwloc_cpuset_t constrained_cpuset ;
666675
667676 constrained_cpuset = hwloc_bitmap_alloc ();
@@ -696,7 +705,7 @@ unsigned int opal_hwloc_base_get_nbobjs_by_type(hwloc_topology_t topo,
696705 unsigned int num_objs ;
697706 hwloc_obj_t obj ;
698707 opal_hwloc_summary_t * sum ;
699- opal_hwloc_topo_data_t * data ;
708+ opal_hwloc_topo_data_t * data = NULL ;
700709 int rc ;
701710
702711 /* bozo check */
@@ -728,10 +737,17 @@ unsigned int opal_hwloc_base_get_nbobjs_by_type(hwloc_topology_t topo,
728737 obj = hwloc_get_root_obj (topo );
729738
730739 /* first see if the topology already has this summary */
731- data = (opal_hwloc_topo_data_t * )obj -> userdata ;
740+ if (false == topo_in_shmem ) {
741+ data = (opal_hwloc_topo_data_t * )obj -> userdata ;
742+ }
732743 if (NULL == data ) {
733744 data = OBJ_NEW (opal_hwloc_topo_data_t );
734- obj -> userdata = (void * )data ;
745+ if (false == topo_in_shmem ) {
746+ // Can't touch userdata if in read-only shmem!
747+ // We have to protect here for the case where obj->userdata
748+ // is in shmem and it is NULL.
749+ obj -> userdata = (void * ) data ;
750+ }
735751 } else {
736752 OPAL_LIST_FOREACH (sum , & data -> summaries , opal_hwloc_summary_t ) {
737753 if (target == sum -> type &&
@@ -1167,8 +1183,6 @@ int opal_hwloc_base_cset2str(char *str, int len,
11671183 char tmp [BUFSIZ ];
11681184 const int stmp = sizeof (tmp ) - 1 ;
11691185 int * * map = NULL ;
1170- hwloc_obj_t root ;
1171- opal_hwloc_topo_data_t * sum ;
11721186
11731187 str [0 ] = tmp [stmp ] = '\0' ;
11741188
@@ -1177,18 +1191,6 @@ int opal_hwloc_base_cset2str(char *str, int len,
11771191 return OPAL_ERR_NOT_BOUND ;
11781192 }
11791193
1180- /* if the cpuset includes all available cpus, then we are unbound */
1181- root = hwloc_get_root_obj (topo );
1182- if (NULL != root -> userdata ) {
1183- sum = (opal_hwloc_topo_data_t * )root -> userdata ;
1184- if (NULL == sum -> available ) {
1185- return OPAL_ERROR ;
1186- }
1187- if (0 != hwloc_bitmap_isincluded (sum -> available , cpuset )) {
1188- return OPAL_ERR_NOT_BOUND ;
1189- }
1190- }
1191-
11921194 if (OPAL_SUCCESS != (ret = build_map (& num_sockets , & num_cores , cpuset , & map , topo ))) {
11931195 return ret ;
11941196 }
@@ -1235,8 +1237,6 @@ int opal_hwloc_base_cset2mapstr(char *str, int len,
12351237 int core_index , pu_index ;
12361238 const int stmp = sizeof (tmp ) - 1 ;
12371239 hwloc_obj_t socket , core , pu ;
1238- hwloc_obj_t root ;
1239- opal_hwloc_topo_data_t * sum ;
12401240
12411241 str [0 ] = tmp [stmp ] = '\0' ;
12421242
@@ -1245,18 +1245,6 @@ int opal_hwloc_base_cset2mapstr(char *str, int len,
12451245 return OPAL_ERR_NOT_BOUND ;
12461246 }
12471247
1248- /* if the cpuset includes all available cpus, then we are unbound */
1249- root = hwloc_get_root_obj (topo );
1250- if (NULL != root -> userdata ) {
1251- sum = (opal_hwloc_topo_data_t * )root -> userdata ;
1252- if (NULL == sum -> available ) {
1253- return OPAL_ERROR ;
1254- }
1255- if (0 != hwloc_bitmap_isincluded (sum -> available , cpuset )) {
1256- return OPAL_ERR_NOT_BOUND ;
1257- }
1258- }
1259-
12601248 /* Iterate over all existing sockets */
12611249 for (socket = hwloc_get_obj_by_type (topo , HWLOC_OBJ_SOCKET , 0 );
12621250 NULL != socket ;
0 commit comments