@@ -491,7 +491,7 @@ ompi_mtl_ofi_define_tag_mode(int ofi_tag_mode, int *bits_for_cid) {
491491 } \
492492 } while(0);
493493
494- static int ompi_mtl_ofi_init_sep (struct fi_info * prov )
494+ static int ompi_mtl_ofi_init_sep (struct fi_info * prov , int universe_size )
495495{
496496 int ret = OMPI_SUCCESS , num_ofi_ctxts ;
497497 struct fi_av_attr av_attr = {0 };
@@ -513,7 +513,7 @@ static int ompi_mtl_ofi_init_sep(struct fi_info *prov)
513513
514514 av_attr .type = (MTL_OFI_AV_TABLE == av_type ) ? FI_AV_TABLE : FI_AV_MAP ;
515515 av_attr .rx_ctx_bits = ompi_mtl_ofi .rx_ctx_bits ;
516- av_attr .count = ompi_mtl_ofi .num_ofi_contexts ;
516+ av_attr .count = ompi_mtl_ofi .num_ofi_contexts * universe_size ;
517517 ret = fi_av_open (ompi_mtl_ofi .domain , & av_attr , & ompi_mtl_ofi .av , NULL );
518518
519519 if (0 != ret ) {
@@ -546,7 +546,7 @@ static int ompi_mtl_ofi_init_sep(struct fi_info *prov)
546546 return ret ;
547547}
548548
549- static int ompi_mtl_ofi_init_regular_ep (struct fi_info * prov )
549+ static int ompi_mtl_ofi_init_regular_ep (struct fi_info * prov , int universe_size )
550550{
551551 int ret = OMPI_SUCCESS ;
552552 struct fi_av_attr av_attr = {0 };
@@ -574,6 +574,7 @@ static int ompi_mtl_ofi_init_regular_ep(struct fi_info * prov)
574574 * - address vector and completion queues
575575 */
576576 av_attr .type = (MTL_OFI_AV_TABLE == av_type ) ? FI_AV_TABLE : FI_AV_MAP ;
577+ av_attr .count = universe_size ;
577578 ret = fi_av_open (ompi_mtl_ofi .domain , & av_attr , & ompi_mtl_ofi .av , NULL );
578579 if (ret ) {
579580 MTL_OFI_LOG_FI_ERR (ret , "fi_av_open failed" );
@@ -626,6 +627,8 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
626627 struct fi_info * prov_cq_data = NULL ;
627628 char ep_name [FI_NAME_MAX ] = {0 };
628629 size_t namelen ;
630+ int universe_size ;
631+ char * univ_size_str ;
629632
630633 /**
631634 * Hints to filter providers
@@ -897,21 +900,35 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
897900 * vectors, completion counters or event queues etc, and enabled.
898901 * See man fi_endpoint for more details.
899902 */
900- max_ofi_ctxts = (prov -> domain_attr -> max_ep_tx_ctx <
901- prov -> domain_attr -> max_ep_rx_ctx ) ?
902- prov -> domain_attr -> max_ep_tx_ctx :
903- prov -> domain_attr -> max_ep_rx_ctx ;
904-
905- num_local_ranks = 1 + ompi_process_info .num_local_peers ;
906- if ((max_ofi_ctxts <= num_local_ranks ) &&
907- (1 == ompi_mtl_ofi .enable_sep )) {
908- opal_show_help ("help-mtl-ofi.txt" , "Local ranks exceed ofi contexts" ,
909- true, prov -> fabric_attr -> prov_name ,
910- ompi_process_info .nodename , __FILE__ , __LINE__ );
911- goto error ;
903+
904+ /* use the universe size as a rough guess on the address vector
905+ * size hint that should be passed to fi_av_open(). For regular
906+ * endpoints, the count will be the universe size. For scalable
907+ * endpoints, the count will be the universe size multiplied by
908+ * the number of contexts. In either case, if the universe grows
909+ * (via dynamic processes), the count is a hint, not a hard limit,
910+ * so libfabric will just be slightly less efficient.
911+ */
912+ univ_size_str = getenv ("OMPI_UNIVERSE_SIZE" );
913+ if (NULL == univ_size_str ||
914+ (universe_size = strtol (univ_size_str , NULL , 0 )) <= 0 ) {
915+ universe_size = ompi_proc_world_size ();
912916 }
913917
914918 if (1 == ompi_mtl_ofi .enable_sep ) {
919+ max_ofi_ctxts = (prov -> domain_attr -> max_ep_tx_ctx <
920+ prov -> domain_attr -> max_ep_rx_ctx ) ?
921+ prov -> domain_attr -> max_ep_tx_ctx :
922+ prov -> domain_attr -> max_ep_rx_ctx ;
923+
924+ num_local_ranks = 1 + ompi_process_info .num_local_peers ;
925+ if (max_ofi_ctxts <= num_local_ranks ) {
926+ opal_show_help ("help-mtl-ofi.txt" , "Local ranks exceed ofi contexts" ,
927+ true, prov -> fabric_attr -> prov_name ,
928+ ompi_process_info .nodename , __FILE__ , __LINE__ );
929+ goto error ;
930+ }
931+
915932 /* Provision enough contexts to service all ranks in a node */
916933 max_ofi_ctxts /= num_local_ranks ;
917934
@@ -926,9 +943,9 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
926943 ompi_mtl_ofi .num_ofi_contexts = max_ofi_ctxts ;
927944 }
928945
929- ret = ompi_mtl_ofi_init_sep (prov );
946+ ret = ompi_mtl_ofi_init_sep (prov , universe_size );
930947 } else {
931- ret = ompi_mtl_ofi_init_regular_ep (prov );
948+ ret = ompi_mtl_ofi_init_regular_ep (prov , universe_size );
932949 }
933950
934951 if (OMPI_SUCCESS != ret ) {
0 commit comments