@@ -110,30 +110,43 @@ mca_coll_xhc_component_t mca_coll_xhc_component = {
110110 .uniform_chunks = true,
111111 .uniform_chunks_min = 4096 ,
112112
113- .op_mca [XHC_BCAST ] = {
113+ .op_mca = {{0 }},
114+ .op_mca_global = {0 }
115+ };
116+
117+ /* Rather than having the defaults directly inside the component, we keep
118+ * them in a separate structure and copy them over (in xhc_register()). The
119+ * structs in the component are used as storage for the MCA variables, and
120+ * the MCA system will nullify the storage of string variables when it is
121+ * teared down during Finalize. This is a problem if we have multiple MPI
122+ * Sessions, as we'll have lost our defaults the next time we attempt to
123+ * initialize our MCA variables at the second Init. */
124+ static xhc_op_mca_t op_mca_default [XHC_COLLCOUNT ] = {
125+ [XHC_BCAST ] = {
114126 .hierarchy = "numa,socket" ,
115127 .chunk_size = "16K" ,
116128 .cico_max = 256
117129 },
118130
119- . op_mca [XHC_BARRIER ] = {
131+ [XHC_BARRIER ] = {
120132 .hierarchy = "numa,socket" ,
121133 .chunk_size = "1" ,
122134 .cico_max = 0
123135 },
124136
125- . op_mca [XHC_REDUCE ] = {
137+ [XHC_REDUCE ] = {
126138 .hierarchy = "l3,numa,socket" ,
127139 .chunk_size = "16K" ,
128140 .cico_max = 4096
129141 },
130142
131- . op_mca [XHC_ALLREDUCE ] = {
143+ [XHC_ALLREDUCE ] = {
132144 .hierarchy = "l3,numa,socket" ,
133145 .chunk_size = "16K" ,
134146 .cico_max = 4096
135147 }
136148};
149+ static xhc_op_mca_t op_mca_global_default = {0 };
137150
138151// -----------------------------
139152
@@ -322,7 +335,7 @@ static int xhc_register(void) {
322335 OBJ_RELEASE (var_enum_flag );
323336
324337 /* (All)reduce uniform chunks */
325- // ---------------------------
338+ // -----------------------------
326339
327340 mca_base_component_var_register (& mca_coll_xhc_component .super .collm_version ,
328341 "uniform_chunks" , "Automatically optimize chunk size in reduction "
@@ -336,6 +349,15 @@ static int xhc_register(void) {
336349 NULL , 0 , 0 , OPAL_INFO_LVL_5 , MCA_BASE_VAR_SCOPE_READONLY ,
337350 & mca_coll_xhc_component .uniform_chunks_min );
338351
352+ /* Apply the op mca defaults. Gotta do it here rather than in-line in
353+ * the registration loops below, as some iterations are skipped, for the
354+ * variables that are not applicable (e.g. chunk size in Barrier). */
355+
356+ for (int t = 0 ; t < XHC_COLLCOUNT ; t ++ ) {
357+ mca_coll_xhc_component .op_mca [t ] = op_mca_default [t ];
358+ }
359+ mca_coll_xhc_component .op_mca_global = op_mca_global_default ;
360+
339361 /* Hierarchy */
340362 // ------------
341363
0 commit comments