Skip to content

Commit 74fd2c3

Browse files
author
Ralph Castain
committed
Cleanup alps odls module
Signed-off-by: Ralph Castain <rhc@open-mpi.org>
1 parent 75684dc commit 74fd2c3

File tree

1 file changed

+41
-54
lines changed

1 file changed

+41
-54
lines changed

orte/mca/odls/alps/odls_alps_module.c

Lines changed: 41 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -144,11 +144,7 @@ static int orte_odls_alps_restart_proc(orte_proc_t *child);
144144
static void send_error_show_help(int fd, int exit_status,
145145
const char *file, const char *topic, ...)
146146
__opal_attribute_noreturn__;
147-
static int do_child(orte_proc_t *child,
148-
char *app, char **argv,
149-
char **environ_copy,
150-
orte_job_t *jobdat, int write_fd,
151-
orte_iof_base_io_conf_t opts)
147+
static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd)
152148
__opal_attribute_noreturn__;
153149

154150

@@ -344,9 +340,8 @@ static int close_open_file_descriptors(int write_fd, orte_iof_base_io_conf_t opt
344340

345341
static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd)
346342
{
347-
int i, rc;
343+
int i;
348344
sigset_t sigs;
349-
char *param, *msg;
350345

351346
/* Setup the pipe to be close-on-exec */
352347
opal_fd_set_cloexec(write_fd);
@@ -449,20 +444,16 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd)
449444
}
450445

451446

452-
static int do_parent(orte_proc_t *child,
453-
char *app, char **argv,
454-
char **environ_copy,
455-
orte_job_t *jobdat, int read_fd,
456-
orte_iof_base_io_conf_t opts)
447+
static int do_parent(orte_odls_spawn_caddy_t *cd, int read_fd)
457448
{
458449
int rc;
459450
orte_odls_pipe_err_msg_t msg;
460451
char file[ORTE_ODLS_MAX_FILE_LEN + 1], topic[ORTE_ODLS_MAX_TOPIC_LEN + 1], *str = NULL;
461452

462-
close(opts.p_stdin[0]);
463-
close(opts.p_stdout[1]);
464-
close(opts.p_stderr[1]);
465-
close(opts.p_internal[1]);
453+
close(cd->opts.p_stdin[0]);
454+
close(cd->opts.p_stdout[1]);
455+
close(cd->opts.p_stderr[1]);
456+
close(cd->opts.p_internal[1]);
466457

467458
/* Block reading a message from the pipe */
468459
while (1) {
@@ -478,18 +469,18 @@ static int do_parent(orte_proc_t *child,
478469
ORTE_ERROR_LOG(rc);
479470
close(read_fd);
480471

481-
if (NULL != child) {
482-
child->state = ORTE_PROC_STATE_UNDEF;
472+
if (NULL != cd->child) {
473+
cd->child->state = ORTE_PROC_STATE_UNDEF;
483474
}
484475
return rc;
485476
}
486477

487478
/* Otherwise, we got a warning or error message from the child */
488-
if (NULL != child) {
479+
if (NULL != cd->child) {
489480
if (msg.fatal) {
490-
ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_ALIVE);
481+
ORTE_FLAG_UNSET(cd->child, ORTE_PROC_FLAG_ALIVE);
491482
} else {
492-
ORTE_FLAG_SET(child, ORTE_PROC_FLAG_ALIVE);
483+
ORTE_FLAG_SET(cd->child, ORTE_PROC_FLAG_ALIVE);
493484
}
494485
}
495486

@@ -499,10 +490,10 @@ static int do_parent(orte_proc_t *child,
499490
if (OPAL_SUCCESS != rc) {
500491
orte_show_help("help-orte-odls-alps.txt", "syscall fail",
501492
true,
502-
orte_process_info.nodename, app,
493+
orte_process_info.nodename, cd->app,
503494
"opal_fd_read", __FILE__, __LINE__);
504-
if (NULL != child) {
505-
child->state = ORTE_PROC_STATE_UNDEF;
495+
if (NULL != cd->child) {
496+
cd->child->state = ORTE_PROC_STATE_UNDEF;
506497
}
507498
return rc;
508499
}
@@ -513,10 +504,10 @@ static int do_parent(orte_proc_t *child,
513504
if (OPAL_SUCCESS != rc) {
514505
orte_show_help("help-orte-odls-alps.txt", "syscall fail",
515506
true,
516-
orte_process_info.nodename, app,
507+
orte_process_info.nodename, cd->app,
517508
"opal_fd_read", __FILE__, __LINE__);
518-
if (NULL != child) {
519-
child->state = ORTE_PROC_STATE_UNDEF;
509+
if (NULL != cd->child) {
510+
cd->child->state = ORTE_PROC_STATE_UNDEF;
520511
}
521512
return rc;
522513
}
@@ -527,10 +518,10 @@ static int do_parent(orte_proc_t *child,
527518
if (NULL == str) {
528519
orte_show_help("help-orte-odls-alps.txt", "syscall fail",
529520
true,
530-
orte_process_info.nodename, app,
521+
orte_process_info.nodename, cd->app,
531522
"opal_fd_read", __FILE__, __LINE__);
532-
if (NULL != child) {
533-
child->state = ORTE_PROC_STATE_UNDEF;
523+
if (NULL != cd->child) {
524+
cd->child->state = ORTE_PROC_STATE_UNDEF;
534525
}
535526
return rc;
536527
}
@@ -551,9 +542,9 @@ static int do_parent(orte_proc_t *child,
551542
closed, indicating that the child launched
552543
successfully). */
553544
if (msg.fatal) {
554-
if (NULL != child) {
555-
child->state = ORTE_PROC_STATE_FAILED_TO_START;
556-
ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_ALIVE);
545+
if (NULL != cd->child) {
546+
cd->child->state = ORTE_PROC_STATE_FAILED_TO_START;
547+
ORTE_FLAG_UNSET(cd->child, ORTE_PROC_FLAG_ALIVE);
557548
}
558549
close(read_fd);
559550
return ORTE_ERR_FAILED_TO_START;
@@ -563,9 +554,9 @@ static int do_parent(orte_proc_t *child,
563554
/* If we got here, it means that the pipe closed without
564555
indication of a fatal error, meaning that the child process
565556
launched successfully. */
566-
if (NULL != child) {
567-
child->state = ORTE_PROC_STATE_RUNNING;
568-
ORTE_FLAG_SET(child, ORTE_PROC_FLAG_ALIVE);
557+
if (NULL != cd->child) {
558+
cd->child->state = ORTE_PROC_STATE_RUNNING;
559+
ORTE_FLAG_SET(cd->child, ORTE_PROC_FLAG_ALIVE);
569560
}
570561
close(read_fd);
571562

@@ -576,14 +567,10 @@ static int do_parent(orte_proc_t *child,
576567
/**
577568
* Fork/exec the specified processes
578569
*/
579-
static int odls_alps_fork_local_proc(orte_proc_t *child,
580-
char *app,
581-
char **argv,
582-
char **environ_copy,
583-
orte_job_t *jobdat,
584-
orte_iof_base_io_conf_t opts)
570+
static int odls_alps_fork_local_proc(void *cdptr)
585571
{
586-
int rc, p[2];
572+
orte_odls_spawn_caddy_t *cd = (orte_odls_spawn_caddy_t*)cdptr;
573+
int p[2];
587574
pid_t pid;
588575

589576
/* A pipe is used to communicate between the parent and child to
@@ -596,24 +583,24 @@ static int odls_alps_fork_local_proc(orte_proc_t *child,
596583
the pipe, then the child was letting us know why it failed. */
597584
if (pipe(p) < 0) {
598585
ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES);
599-
if (NULL != child) {
600-
child->state = ORTE_PROC_STATE_FAILED_TO_START;
601-
child->exit_code = ORTE_ERR_SYS_LIMITS_PIPES;
586+
if (NULL != cd->child) {
587+
cd->child->state = ORTE_PROC_STATE_FAILED_TO_START;
588+
cd->child->exit_code = ORTE_ERR_SYS_LIMITS_PIPES;
602589
}
603590
return ORTE_ERR_SYS_LIMITS_PIPES;
604591
}
605592

606593
/* Fork off the child */
607594
pid = fork();
608-
if (NULL != child) {
609-
child->pid = pid;
595+
if (NULL != cd->child) {
596+
cd->child->pid = pid;
610597
}
611598

612599
if (pid < 0) {
613600
ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_CHILDREN);
614-
if (NULL != child) {
615-
child->state = ORTE_PROC_STATE_FAILED_TO_START;
616-
child->exit_code = ORTE_ERR_SYS_LIMITS_CHILDREN;
601+
if (NULL != cd->child) {
602+
cd->child->state = ORTE_PROC_STATE_FAILED_TO_START;
603+
cd->child->exit_code = ORTE_ERR_SYS_LIMITS_CHILDREN;
617604
}
618605
return ORTE_ERR_SYS_LIMITS_CHILDREN;
619606
}
@@ -623,12 +610,12 @@ static int odls_alps_fork_local_proc(orte_proc_t *child,
623610
#if HAVE_SETPGID
624611
setpgid(0, 0);
625612
#endif
626-
do_child(child, app, argv, environ_copy, jobdat, p[1], opts);
613+
do_child(cd, p[1]);
627614
/* Does not return */
628615
}
629616

630617
close(p[1]);
631-
return do_parent(child, app, argv, environ_copy, jobdat, p[0], opts);
618+
return do_parent(cd, p[0]);
632619
}
633620

634621

@@ -638,8 +625,8 @@ static int odls_alps_fork_local_proc(orte_proc_t *child,
638625

639626
int orte_odls_alps_launch_local_procs(opal_buffer_t *data)
640627
{
641-
int rc;
642628
orte_jobid_t job;
629+
int rc;
643630

644631
/* construct the list of children we are to launch */
645632
if (ORTE_SUCCESS != (rc = orte_odls_base_default_construct_child_list(data, &job))) {

0 commit comments

Comments
 (0)