55 * Copyright (c) 2020-2022 Triad National Security, LLC. All rights
66 * reserved.
77 * Copyright (c) 2020-2021 Cisco Systems, Inc. All rights reserved.
8- * Copyright (c) 2021 Nanook Consulting. All rights reserved.
8+ * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved.
99 * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All rights
1010 * reserved.
1111 * Copyright (c) 2023 UT-Battelle, LLC. All rights reserved.
@@ -469,35 +469,42 @@ static int check_provider_attr(struct fi_info *provider_info, struct fi_info *pr
469469static int compute_dev_distances (pmix_device_distance_t * * distances ,
470470 size_t * ndist )
471471{
472- int ret = 0 ;
472+ int ret = OPAL_SUCCESS ;
473473 size_t ninfo ;
474474 pmix_info_t * info ;
475475 pmix_cpuset_t cpuset ;
476- pmix_topology_t * pmix_topo ;
476+ pmix_topology_t pmix_topo = PMIX_TOPOLOGY_STATIC_INIT ;
477477 pmix_device_type_t type = PMIX_DEVTYPE_OPENFABRICS |
478478 PMIX_DEVTYPE_NETWORK ;
479479
480480 PMIX_CPUSET_CONSTRUCT (& cpuset );
481481 ret = PMIx_Get_cpuset (& cpuset , PMIX_CPUBIND_THREAD );
482482 if (PMIX_SUCCESS != ret ) {
483+ /* we are not bound */
484+ ret = OPAL_ERR_NOT_BOUND ;
483485 goto out ;
484486 }
487+ /* if we are not bound, then we cannot compute distances */
488+ if (hwloc_bitmap_iszero (cpuset .bitmap ) ||
489+ hwloc_bitmap_isfull (cpuset .bitmap )) {
490+ return OPAL_ERR_NOT_BOUND ;
491+ }
485492
486- /* load the PMIX topology */
487- PMIx_Topology_free (pmix_topo , 1 );
488- ret = PMIx_Load_topology (pmix_topo );
493+ /* load the PMIX topology - this just loads a pointer to
494+ * the local topology held in PMIx, so you must not
495+ * free it */
496+ ret = PMIx_Load_topology (& pmix_topo );
489497 if (PMIX_SUCCESS != ret ) {
490498 goto out ;
491499 }
492500
493501 ninfo = 1 ;
494502 info = PMIx_Info_create (ninfo );
495503 PMIx_Info_load (& info [0 ], PMIX_DEVICE_TYPE , & type , PMIX_DEVTYPE );
496- ret = PMIx_Compute_distances (pmix_topo , & cpuset , info , ninfo , distances ,
504+ ret = PMIx_Compute_distances (& pmix_topo , & cpuset , info , ninfo , distances ,
497505 ndist );
498506 PMIx_Info_free (info , ninfo );
499507
500- PMIx_Topology_free (pmix_topo , 1 );
501508out :
502509 return ret ;
503510}
@@ -533,8 +540,9 @@ get_nearest_nics(int *num_distances, pmix_value_t **valin)
533540 PMIx_Info_destruct (& directive );
534541 if (ret != PMIX_SUCCESS || !val ) {
535542 ret = compute_dev_distances (& distances , & ndist );
536- if (ret )
543+ if (ret ) {
537544 goto out ;
545+ }
538546 goto find_nearest ;
539547 }
540548
@@ -554,8 +562,9 @@ get_nearest_nics(int *num_distances, pmix_value_t **valin)
554562
555563find_nearest :
556564 nearest = calloc (sizeof (* distances ), ndist );
557- if (!nearest )
565+ if (!nearest ) {
558566 goto out ;
567+ }
559568
560569 for (i = 0 ; i < ndist ; i ++ ) {
561570 if (distances [i ].type != PMIX_DEVTYPE_NETWORK &&
@@ -596,6 +605,15 @@ get_nearest_nics(int *num_distances, pmix_value_t **valin)
596605 * distances array is not provided. False otherwise.
597606 *
598607 */
608+ #if HWLOC_API_VERSION < 0x00020000
609+ static bool is_near (pmix_device_distance_t * distances ,
610+ int num_distances ,
611+ hwloc_topology_t topology ,
612+ struct fi_pci_attr pci )
613+ {
614+ return true;
615+ }
616+ #else
599617static bool is_near (pmix_device_distance_t * distances ,
600618 int num_distances ,
601619 hwloc_topology_t topology ,
@@ -658,6 +676,7 @@ static bool is_near(pmix_device_distance_t *distances,
658676 return false;
659677}
660678#endif
679+ #endif // OPAL_OFI_PCI_DATA_AVAILABLE
661680
662681/* Count providers returns the number of providers present in an fi_info list
663682 * @param (IN) provider_list struct fi_info* list of providers available
@@ -772,8 +791,8 @@ struct fi_info *opal_common_ofi_select_provider(struct fi_info *provider_list,
772791 pmix_value_t * pmix_val ;
773792 struct fi_pci_attr pci ;
774793 int num_distances = 0 ;
775- bool near ;
776794#endif
795+ bool near ;
777796 int ret ;
778797 unsigned int num_provider = 0 , provider_limit = 0 ;
779798 bool provider_found = false;
0 commit comments