@@ -385,6 +385,7 @@ int orte_util_decode_nidmap(opal_buffer_t *buf)
385385 /* add this name to the pool */
386386 nd = OBJ_NEW (orte_node_t );
387387 nd -> name = strdup (names [n ]);
388+ nd -> index = n ;
388389 opal_pointer_array_set_item (orte_node_pool , n , nd );
389390 /* set the topology - always default to homogeneous
390391 * as that is the most common scenario */
@@ -409,7 +410,6 @@ int orte_util_decode_nidmap(opal_buffer_t *buf)
409410 daemons -> num_procs ++ ;
410411 opal_pointer_array_set_item (daemons -> procs , proc -> name .vpid , proc );
411412 }
412- nd -> index = proc -> name .vpid ;
413413 OBJ_RETAIN (nd );
414414 proc -> node = nd ;
415415 OBJ_RETAIN (proc );
@@ -945,8 +945,9 @@ int orte_util_parse_node_info(opal_buffer_t *buf)
945945int orte_util_generate_ppn (orte_job_t * jdata ,
946946 opal_buffer_t * buf )
947947{
948- uint16_t * ppn = NULL ;
949- size_t nbytes ;
948+ uint16_t ppn ;
949+ uint8_t * bytes ;
950+ int32_t nbytes ;
950951 int rc = ORTE_SUCCESS ;
951952 orte_app_idx_t i ;
952953 int j , k ;
@@ -955,40 +956,47 @@ int orte_util_generate_ppn(orte_job_t *jdata,
955956 orte_node_t * nptr ;
956957 orte_proc_t * proc ;
957958 size_t sz ;
959+ opal_buffer_t bucket ;
958960
959- /* make room for the number of procs on each node */
960- nbytes = sizeof (uint16_t ) * orte_node_pool -> size ;
961- ppn = (uint16_t * )malloc (nbytes );
961+ OBJ_CONSTRUCT (& bucket , opal_buffer_t );
962962
963963 for (i = 0 ; i < jdata -> num_apps ; i ++ ) {
964- /* reset the #procs */
965- memset (ppn , 0 , nbytes );
966- /* for each app_context, compute the #procs on
967- * each node of the allocation */
968- for (j = 0 ; j < orte_node_pool -> size ; j ++ ) {
969- if (NULL == (nptr = (orte_node_t * )opal_pointer_array_get_item (orte_node_pool , j ))) {
964+ /* for each app_context */
965+ for (j = 0 ; j < jdata -> map -> nodes -> size ; j ++ ) {
966+ if (NULL == (nptr = (orte_node_t * )opal_pointer_array_get_item (jdata -> map -> nodes , j ))) {
970967 continue ;
971968 }
972969 if (NULL == nptr -> daemon ) {
973970 continue ;
974971 }
972+ ppn = 0 ;
975973 for (k = 0 ; k < nptr -> procs -> size ; k ++ ) {
976974 if (NULL != (proc = (orte_proc_t * )opal_pointer_array_get_item (nptr -> procs , k ))) {
977975 if (proc -> name .jobid == jdata -> jobid ) {
978- ++ ppn [ j ] ;
976+ ++ ppn ;
979977 }
980978 }
981979 }
980+ if (0 < ppn ) {
981+ if (ORTE_SUCCESS != (rc = opal_dss .pack (& bucket , & nptr -> index , 1 , ORTE_STD_CNTR ))) {
982+ goto cleanup ;
983+ }
984+ if (ORTE_SUCCESS != (rc = opal_dss .pack (& bucket , & ppn , 1 , OPAL_UINT16 ))) {
985+ goto cleanup ;
986+ }
987+ }
982988 }
983- if (opal_compress .compress_block ((uint8_t * )ppn , nbytes ,
989+ opal_dss .unload (& bucket , (void * * )& bytes , & nbytes );
990+
991+ if (opal_compress .compress_block (bytes , (size_t )nbytes ,
984992 (uint8_t * * )& bo .bytes , & sz )) {
985993 /* mark that this was compressed */
986994 compressed = true;
987995 bo .size = sz ;
988996 } else {
989997 /* mark that this was not compressed */
990998 compressed = false;
991- bo .bytes = ( uint8_t * ) ppn ;
999+ bo .bytes = bytes ;
9921000 bo .size = nbytes ;
9931001 }
9941002 /* indicate compression */
@@ -1015,21 +1023,31 @@ int orte_util_generate_ppn(orte_job_t *jdata,
10151023 }
10161024
10171025 cleanup :
1018- free ( ppn );
1026+ OBJ_DESTRUCT ( & bucket );
10191027 return rc ;
10201028}
10211029
10221030int orte_util_decode_ppn (orte_job_t * jdata ,
10231031 opal_buffer_t * buf )
10241032{
1033+ orte_std_cntr_t index ;
10251034 orte_app_idx_t n ;
1026- int m , cnt , rc ;
1035+ int cnt , rc ;
10271036 opal_byte_object_t * boptr ;
10281037 bool compressed ;
1038+ uint8_t * bytes ;
10291039 size_t sz ;
1030- uint16_t * ppn , k ;
1040+ uint16_t ppn , k ;
10311041 orte_node_t * node ;
10321042 orte_proc_t * proc ;
1043+ opal_buffer_t bucket ;
1044+
1045+ /* reset any flags */
1046+ for (n = 0 ; n < orte_node_pool -> size ; n ++ ) {
1047+ if (NULL != (node = (orte_node_t * )opal_pointer_array_get_item (orte_node_pool , n ))) {
1048+ ORTE_FLAG_UNSET (node , ORTE_NODE_FLAG_MAPPED );
1049+ }
1050+ }
10331051
10341052 for (n = 0 ; n < jdata -> num_apps ; n ++ ) {
10351053 /* unpack the compression flag */
@@ -1062,14 +1080,14 @@ int orte_util_decode_ppn(orte_job_t *jdata,
10621080
10631081 /* decompress if required */
10641082 if (compressed ) {
1065- if (!opal_compress .decompress_block (( uint8_t * * ) & ppn , sz ,
1083+ if (!opal_compress .decompress_block (& bytes , sz ,
10661084 boptr -> bytes , boptr -> size )) {
10671085 ORTE_ERROR_LOG (ORTE_ERROR );
10681086 OBJ_RELEASE (boptr );
10691087 return ORTE_ERROR ;
10701088 }
10711089 } else {
1072- ppn = ( uint16_t * ) boptr -> bytes ;
1090+ bytes = boptr -> bytes ;
10731091 boptr -> bytes = NULL ;
10741092 boptr -> size = 0 ;
10751093 }
@@ -1078,38 +1096,67 @@ int orte_util_decode_ppn(orte_job_t *jdata,
10781096 }
10791097 free (boptr );
10801098
1081- /* cycle thru the node pool */
1082- for (m = 0 ; m < orte_node_pool -> size ; m ++ ) {
1083- if (NULL == (node = (orte_node_t * )opal_pointer_array_get_item (orte_node_pool , m ))) {
1084- continue ;
1099+ /* setup to unpack */
1100+ OBJ_CONSTRUCT (& bucket , opal_buffer_t );
1101+ opal_dss .load (& bucket , bytes , sz );
1102+
1103+ /* unpack each node and its ppn */
1104+ cnt = 1 ;
1105+ while (OPAL_SUCCESS == (rc = opal_dss .unpack (& bucket , & index , & cnt , ORTE_STD_CNTR ))) {
1106+ /* get the corresponding node object */
1107+ if (NULL == (node = (orte_node_t * )opal_pointer_array_get_item (orte_node_pool , index ))) {
1108+ rc = ORTE_ERR_NOT_FOUND ;
1109+ ORTE_ERROR_LOG (rc );
1110+ goto error ;
10851111 }
1086- if (0 < ppn [m ]) {
1087- if (!ORTE_FLAG_TEST (node , ORTE_NODE_FLAG_MAPPED )) {
1088- OBJ_RETAIN (node );
1089- ORTE_FLAG_SET (node , ORTE_NODE_FLAG_MAPPED );
1090- opal_pointer_array_add (jdata -> map -> nodes , node );
1091- }
1092- /* create a proc object for each one */
1093- for (k = 0 ; k < ppn [m ]; k ++ ) {
1094- proc = OBJ_NEW (orte_proc_t );
1095- proc -> name .jobid = jdata -> jobid ;
1096- /* leave the vpid undefined as this will be determined
1097- * later when we do the overall ranking */
1098- proc -> app_idx = n ;
1099- proc -> parent = node -> daemon -> name .vpid ;
1100- OBJ_RETAIN (node );
1101- proc -> node = node ;
1102- /* flag the proc as ready for launch */
1103- proc -> state = ORTE_PROC_STATE_INIT ;
1104- opal_pointer_array_add (node -> procs , proc );
1105- /* we will add the proc to the jdata array when we
1106- * compute its rank */
1107- }
1108- node -> num_procs += ppn [m ];
1112+ /* add the node to the job map if not already assigned */
1113+ if (!ORTE_FLAG_TEST (node , ORTE_NODE_FLAG_MAPPED )) {
1114+ OBJ_RETAIN (node );
1115+ opal_pointer_array_add (jdata -> map -> nodes , node );
1116+ ORTE_FLAG_SET (node , ORTE_NODE_FLAG_MAPPED );
1117+ }
1118+ /* get the ppn */
1119+ cnt = 1 ;
1120+ if (OPAL_SUCCESS != (rc = opal_dss .unpack (& bucket , & ppn , & cnt , OPAL_UINT16 ))) {
1121+ ORTE_ERROR_LOG (rc );
1122+ goto error ;
1123+ }
1124+ /* create a proc object for each one */
1125+ for (k = 0 ; k < ppn ; k ++ ) {
1126+ proc = OBJ_NEW (orte_proc_t );
1127+ proc -> name .jobid = jdata -> jobid ;
1128+ /* leave the vpid undefined as this will be determined
1129+ * later when we do the overall ranking */
1130+ proc -> app_idx = n ;
1131+ proc -> parent = node -> daemon -> name .vpid ;
1132+ OBJ_RETAIN (node );
1133+ proc -> node = node ;
1134+ /* flag the proc as ready for launch */
1135+ proc -> state = ORTE_PROC_STATE_INIT ;
1136+ opal_pointer_array_add (node -> procs , proc );
1137+ /* we will add the proc to the jdata array when we
1138+ * compute its rank */
11091139 }
1140+ node -> num_procs += ppn ;
1141+ cnt = 1 ;
11101142 }
1111- free (ppn );
1143+ OBJ_DESTRUCT (& bucket );
1144+ }
1145+
1146+ /* reset any flags */
1147+ for (n = 0 ; n < jdata -> map -> nodes -> size ; n ++ ) {
1148+ node = (orte_node_t * )opal_pointer_array_get_item (jdata -> map -> nodes , n );
1149+ ORTE_FLAG_UNSET (node , ORTE_NODE_FLAG_MAPPED );
11121150 }
11131151
11141152 return ORTE_SUCCESS ;
1153+
1154+ error :
1155+ OBJ_DESTRUCT (& bucket );
1156+ /* reset any flags */
1157+ for (n = 0 ; n < jdata -> map -> nodes -> size ; n ++ ) {
1158+ node = (orte_node_t * )opal_pointer_array_get_item (jdata -> map -> nodes , n );
1159+ ORTE_FLAG_UNSET (node , ORTE_NODE_FLAG_MAPPED );
1160+ }
1161+ return rc ;
11151162}
0 commit comments