1111 * Copyright (c) 2004-2005 The Regents of the University of California.
1212 * All rights reserved.
1313 * Copyright (c) 2006-2010 QLogic Corporation. All rights reserved.
14- * Copyright (c) 2012-2015 Los Alamos National Security, LLC.
15- * All rights reserved.
14+ * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights
15+ * reserved.
1616 * Copyright (c) 2013-2015 Intel, Inc. All rights reserved
1717 * Copyright (c) 2017 Research Organization for Information Science
1818 * and Technology (RIST). All rights reserved.
@@ -77,9 +77,129 @@ mca_mtl_psm2_component_t mca_mtl_psm2_component = {
7777 }
7878};
7979
80+ struct ompi_mtl_psm2_shadow_variable {
81+ int variable_type ;
82+ void * storage ;
83+ mca_base_var_storage_t default_value ;
84+ const char * env_name ;
85+ mca_base_var_info_lvl_t info_level ;
86+ const char * mca_name ;
87+ const char * description ;
88+ };
89+
90+ struct ompi_mtl_psm2_shadow_variable ompi_mtl_psm2_shadow_variables [] = {
91+ {MCA_BASE_VAR_TYPE_STRING , & ompi_mtl_psm2 .psm2_devices , {.stringval = "self,shm,hfi" }, "PSM2_DEVICES" , OPAL_INFO_LVL_3 ,
92+ "devices" , "Comma-delimited list of PSM2 devices. Valid values: self, shm, hfi (default: self,shm,hfi)" },
93+ {MCA_BASE_VAR_TYPE_STRING , & ompi_mtl_psm2 .psm2_memory , {.stringval = "normal" }, "PSM2_MEMORY" , OPAL_INFO_LVL_9 ,
94+ "memory_model" , "PSM2 memory usage mode (default: normal)" },
95+ {MCA_BASE_VAR_TYPE_UNSIGNED_LONG , & ompi_mtl_psm2 .psm2_mq_sendreqs_max , {.ulval = 1048576 }, "PSM2_MQ_SENDREQS_MAX" , OPAL_INFO_LVL_3 ,
96+ "mq_sendreqs_max" , "PSM2 maximum number of isend requests in flight (default: 1M)" },
97+ {MCA_BASE_VAR_TYPE_UNSIGNED_LONG , & ompi_mtl_psm2 .psm2_mq_recvreqs_max , {.ulval = 1048576 }, "PSM2_MQ_RECVREQS_MAX" , OPAL_INFO_LVL_3 ,
98+ "mq_recvreqs_max" , "PSM2 maximum number of irecv requests in flight (default: 1M)" },
99+ {MCA_BASE_VAR_TYPE_UNSIGNED_LONG , & ompi_mtl_psm2 .psm2_mq_rndv_hfi_threshold , {.ulval = 64000 }, "PSM2_MQ_RNDV_HFI_THRESH" , OPAL_INFO_LVL_3 ,
100+ "hfi_eager_limit" , "PSM2 eager to rendezvous threshold (default: 64000)" },
101+ {MCA_BASE_VAR_TYPE_UNSIGNED_LONG , & ompi_mtl_psm2 .psm2_mq_rndv_shm_threshold , {.ulval = 16000 }, "PSM2_MQ_RNDV_SHM_THRESH" , OPAL_INFO_LVL_3 ,
102+ "shm_eager_limit" , "PSM2 shared memory eager to rendezvous threshold (default: 16000)" },
103+ {MCA_BASE_VAR_TYPE_BOOL , & ompi_mtl_psm2 .psm2_recvthread , {.boolval = true}, "PSM2_RCVTHREAD" , OPAL_INFO_LVL_3 ,
104+ "use_receive_thread" , "Use PSM2 progress thread (default: true)" },
105+ {MCA_BASE_VAR_TYPE_BOOL , & ompi_mtl_psm2 .psm2_shared_contexts , {.boolval = true}, "PSM2_SHAREDCONTEXTS" , OPAL_INFO_LVL_6 ,
106+ "use_shared_contexts" , "Share PSM contexts between MPI processes (default: true)" },
107+ {MCA_BASE_VAR_TYPE_UNSIGNED_LONG , & ompi_mtl_psm2 .psm2_shared_contexts_max , {.ulval = 8 }, "PSM2_SHAREDCONTEXTS_MAX" , OPAL_INFO_LVL_9 ,
108+ "max_shared_contexts" , "Maximum number of contexts available on a node (default: 8, max: 8)" },
109+ {MCA_BASE_VAR_TYPE_UNSIGNED_LONG , & ompi_mtl_psm2 .psm2_tracemask , {.ulval = 1 }, "PSM2_TRACEMASK" , OPAL_INFO_LVL_9 ,
110+ "trace_mask" , "PSM2 tracemask value. See PSM2 documentation for accepted values (default: 1)" },
111+ {-1 },
112+ };
113+
114+ static void ompi_mtl_psm2_set_shadow_env (struct ompi_mtl_psm2_shadow_variable * variable )
115+ {
116+ mca_base_var_storage_t * storage = variable -> storage ;
117+ char * env_value ;
118+ int ret ;
119+
120+ switch (variable -> variable_type ) {
121+ case MCA_BASE_VAR_TYPE_BOOL :
122+ ret = asprintf (& env_value , "%s=%s" , variable -> env_name , storage -> boolval ? "YES" : "NO" );
123+ break ;
124+ case MCA_BASE_VAR_TYPE_UNSIGNED_LONG :
125+ if (0 == strcmp (variable -> env_name , "PSM2_TRACEMASK" )) {
126+ /* PSM2 documentation shows the tracemask as a hexidecimal number. to be consitent
127+ * use hexidecimal here. */
128+ ret = asprintf (& env_value , "%s=0x%lx" , variable -> env_name , storage -> ulval );
129+ } else {
130+ ret = asprintf (& env_value , "%s=%lu" , variable -> env_name , storage -> ulval );
131+ }
132+ break ;
133+ case MCA_BASE_VAR_TYPE_STRING :
134+ ret = asprintf (& env_value , "%s=%s" , variable -> env_name , storage -> stringval );
135+ break ;
136+ }
137+
138+ if (0 > ret ) {
139+ fprintf (stderr , "ERROR setting PSM2 environment variable: %s\n" , variable -> env_name );
140+ } else {
141+ putenv (env_value );
142+ }
143+ }
144+
145+ static void ompi_mtl_psm2_register_shadow_env (struct ompi_mtl_psm2_shadow_variable * variable )
146+ {
147+ mca_base_var_storage_t * storage = variable -> storage ;
148+ char * env_value ;
149+
150+ env_value = getenv (variable -> env_name );
151+ switch (variable -> variable_type ) {
152+ case MCA_BASE_VAR_TYPE_BOOL :
153+ if (env_value ) {
154+ int tmp ;
155+ (void ) mca_base_var_enum_bool .value_from_string (& mca_base_var_enum_bool , env_value , & tmp );
156+ storage -> boolval = !!tmp ;
157+ } else {
158+ storage -> boolval = variable -> default_value .boolval ;
159+ }
160+ break ;
161+ case MCA_BASE_VAR_TYPE_UNSIGNED_LONG :
162+ if (env_value ) {
163+ storage -> ulval = strtol (env_value , NULL , 0 );
164+ } else {
165+ storage -> ulval = variable -> default_value .ulval ;
166+ }
167+ break ;
168+ case MCA_BASE_VAR_TYPE_STRING :
169+ if (env_value ) {
170+ storage -> stringval = env_value ;
171+ } else {
172+ storage -> stringval = variable -> default_value .stringval ;
173+ }
174+ break ;
175+ }
176+
177+ (void ) mca_base_component_var_register (& mca_mtl_psm2_component .super .mtl_version , variable -> mca_name , variable -> description ,
178+ variable -> variable_type , NULL , 0 , 0 , variable -> info_level , MCA_BASE_VAR_SCOPE_READONLY ,
179+ variable -> storage );
180+ }
181+
182+ static int
183+ get_num_total_procs (int * out_ntp )
184+ {
185+ * out_ntp = (int )ompi_process_info .num_procs ;
186+ return OMPI_SUCCESS ;
187+ }
188+
189+ static int
190+ get_num_local_procs (int * out_nlp )
191+ {
192+ /* num_local_peers does not include us in
193+ * its calculation, so adjust for that */
194+ * out_nlp = (int )(1 + ompi_process_info .num_local_peers );
195+ return OMPI_SUCCESS ;
196+ }
197+
80198static int
81199ompi_mtl_psm2_component_register (void )
82200{
201+ int num_local_procs , num_total_procs ;
202+
83203 ompi_mtl_psm2 .connect_timeout = 180 ;
84204 (void ) mca_base_component_var_register (& mca_mtl_psm2_component .super .mtl_version ,
85205 "connect_timeout" ,
@@ -89,15 +209,32 @@ ompi_mtl_psm2_component_register(void)
89209 MCA_BASE_VAR_SCOPE_READONLY ,
90210 & ompi_mtl_psm2 .connect_timeout );
91211
212+
213+ (void ) get_num_local_procs (& num_local_procs );
214+ (void ) get_num_total_procs (& num_total_procs );
215+
92216 /* set priority high enough to beat ob1's default (also set higher than psm) */
93- param_priority = 40 ;
217+ if (num_local_procs == num_total_procs ) {
218+ /* disable hfi if all processes are local */
219+ setenv ("PSM2_DEVICES" , "self,shm" , 0 );
220+ /* ob1 is much faster than psm2 with shared memory */
221+ param_priority = 10 ;
222+ } else {
223+ param_priority = 40 ;
224+ }
225+
94226 (void ) mca_base_component_var_register (& mca_mtl_psm2_component .super .mtl_version ,
95227 "priority" , "Priority of the PSM2 MTL component" ,
96228 MCA_BASE_VAR_TYPE_INT , NULL , 0 , 0 ,
97229 OPAL_INFO_LVL_9 ,
98230 MCA_BASE_VAR_SCOPE_READONLY ,
99231 & param_priority );
100232
233+
234+ for (int i = 0 ; ompi_mtl_psm2_shadow_variables [i ].variable_type >= 0 ; ++ i ) {
235+ ompi_mtl_psm2_register_shadow_env (ompi_mtl_psm2_shadow_variables + i );
236+ }
237+
101238 return OMPI_SUCCESS ;
102239}
103240
@@ -172,22 +309,6 @@ ompi_mtl_psm2_component_close(void)
172309 return OMPI_SUCCESS ;
173310}
174311
175- static int
176- get_num_total_procs (int * out_ntp )
177- {
178- * out_ntp = (int )ompi_process_info .num_procs ;
179- return OMPI_SUCCESS ;
180- }
181-
182- static int
183- get_num_local_procs (int * out_nlp )
184- {
185- /* num_local_peers does not include us in
186- * its calculation, so adjust for that */
187- * out_nlp = (int )(1 + ompi_process_info .num_local_peers );
188- return OMPI_SUCCESS ;
189- }
190-
191312static int
192313get_local_rank (int * out_rank )
193314{
@@ -211,7 +332,6 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads,
211332 int verno_major = PSM2_VERNO_MAJOR ;
212333 int verno_minor = PSM2_VERNO_MINOR ;
213334 int local_rank = -1 , num_local_procs = 0 ;
214- int num_total_procs = 0 ;
215335
216336 /* Compute the total number of processes on this host and our local rank
217337 * on that node. We need to provide PSM2 with these values so it can
@@ -226,11 +346,6 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads,
226346 opal_output (0 , "Cannot determine local rank. Cannot continue.\n" );
227347 return NULL ;
228348 }
229- if (OMPI_SUCCESS != get_num_total_procs (& num_total_procs )) {
230- opal_output (0 , "Cannot determine total number of processes. "
231- "Cannot continue.\n" );
232- return NULL ;
233- }
234349
235350 err = psm2_error_register_handler (NULL /* no ep */ ,
236351 PSM2_ERRHANDLER_NOP );
@@ -240,8 +355,8 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads,
240355 return NULL ;
241356 }
242357
243- if ( num_local_procs == num_total_procs ) {
244- setenv ( "PSM2_DEVICES" , "self,shm" , 0 );
358+ for ( int i = 0 ; ompi_mtl_psm2_shadow_variables [ i ]. variable_type >= 0 ; ++ i ) {
359+ ompi_mtl_psm2_set_shadow_env ( ompi_mtl_psm2_shadow_variables + i );
245360 }
246361
247362 err = psm2_init (& verno_major , & verno_minor );
0 commit comments