2828 * All Rights Reserved.
2929 *
3030 * Portions of this code are Copyright 2011 Univa Inc.
31- *
32- * Portions of this software are Copyright (c) 2023-2024 HPC-Gridware GmbH
31+ *
32+ * Portions of this software are Copyright (c) 2023-2025 HPC-Gridware GmbH
3333 *
3434 ************************************************************************/
3535/* ___INFO__MARK_END__*/
@@ -69,6 +69,7 @@ static unsigned int g_pid = 0; /* set by main thread, read by worker thread */
6969static int g_raw_mode_state = 0 ; /* set by main thread, read by worker thread */
7070static int g_suspend_remote = 0 ; /* set by main thread, read by worker thread */
7171static COMM_HANDLE *g_comm_handle = nullptr ;
72+ static int g_wakeup_pipe[2 ] = { -1 , -1 }; /* pipe to wake up worker thread */
7273
7374/*
7475 * static volatile sig_atomic_t received_window_change_signal = 1;
@@ -147,7 +148,7 @@ static void broken_pipe_handler(int sig)
147148* static void signal_handler(int sig)
148149*
149150* FUNCTION
150- * Handler for all signals that quit the program. These signals are trapped
151+ * Handler for all signals that quit the program. These signals are trapped
151152* in order to restore the terminal modes.
152153*
153154* INPUTS
@@ -190,7 +191,7 @@ void set_signal_handlers()
190191 memset (&old_handler, 0 , sizeof (old_handler));
191192 memset (&new_handler, 0 , sizeof (new_handler));
192193
193- /* Is SIGHUP necessary?
194+ /* Is SIGHUP necessary?
194195 * Yes: termio(7I) says:
195196 * "When a modem disconnect is detected, a SIGHUP signal is sent
196197 * to the terminal's controlling process.
@@ -288,7 +289,7 @@ static void client_check_window_change(COMM_HANDLE *handle)
288289 if (received_window_change_signal) {
289290 /*
290291 * here we can have a race condition between the two working threads,
291- * but it doesn't matter - in the worst case, the new window size gets
292+ * but it doesn't matter - in the worst case, the new window size gets
292293 * submitted two times.
293294 */
294295 received_window_change_signal = 0 ;
@@ -340,8 +341,13 @@ void* tty_to_commlib(void *t_conf)
340341 DSTRING_STATIC (err_msg, MAX_STRING_SIZE);
341342
342343 thread_func_startup (t_conf);
343-
344- /*
344+
345+ // create a pipe to wake up this thread
346+ if (pipe (g_wakeup_pipe) < 0 ) {
347+ DPRINTF (" tty_to_commlib: pipe() failed: %s\n " , strerror (errno));
348+ }
349+
350+ /*
345351 * allocate working buffer
346352 */
347353 bool do_exit = false ;
@@ -352,28 +358,33 @@ void* tty_to_commlib(void *t_conf)
352358 /* wait for input on tty */
353359 FD_SET (STDIN_FILENO, &read_fds);
354360 }
361+ if (g_wakeup_pipe[0 ] != -1 ) {
362+ /* wait for input on wakeup pipe */
363+ FD_SET (g_wakeup_pipe[0 ], &read_fds);
364+ }
355365 struct timeval timeout;
356366 timeout.tv_sec = 1 ;
357367 timeout.tv_usec = 0 ;
358368
359369 if (received_signal == SIGCONT) {
360- received_signal = 0 ;
361- if (continue_handler (g_comm_handle, g_hostname) == 1 ) {
362- do_exit = true ;
363- continue ;
364- }
365- if (g_raw_mode_state == 1 ) {
366- /* restore raw-mode after SIGCONT */
367- if (terminal_enter_raw_mode () != 0 ) {
368- DPRINTF (" tty_to_commlib: couldn't enter raw mode for pty\n " );
369- do_exit = true ;
370- continue ;
370+ received_signal = 0 ;
371+ if (continue_handler (g_comm_handle, g_hostname) == 1 ) {
372+ do_exit = true ;
373+ continue ;
374+ }
375+ if (g_raw_mode_state == 1 ) {
376+ /* restore raw-mode after SIGCONT */
377+ if (terminal_enter_raw_mode () != 0 ) {
378+ DPRINTF (" tty_to_commlib: couldn't enter raw mode for pty\n " );
379+ do_exit = true ;
380+ continue ;
371381 }
372- }
373- }
374-
382+ }
383+ }
384+
375385 DPRINTF (" tty_to_commlib: Waiting in select() for data\n " );
376- int ret = select (STDIN_FILENO+1 , &read_fds, nullptr , nullptr , &timeout);
386+ int ret = select (MAX (STDIN_FILENO, g_wakeup_pipe[0 ]) + 1 , &read_fds, nullptr , nullptr , &timeout);
387+ DPRINTF (" select returned %d\n " , ret);
377388
378389 thread_testcancel (t_conf);
379390 client_check_window_change (g_comm_handle);
@@ -394,29 +405,36 @@ void* tty_to_commlib(void *t_conf)
394405 /* We should never get here if STDIN is closed */
395406 DPRINTF (" tty_to_commlib: STDIN ready to read while it should be closed!!!\n " );
396407 }
397- DPRINTF (" tty_to_commlib: trying to read() from stdin\n " );
398- int nread = read (STDIN_FILENO, pbuf, BUFSIZE-1 );
399- pbuf[nread] = ' \0 ' ;
400- sge_dstring_append (&dbuf, pbuf);
401- DPRINTF (" tty_to_commlib: nread = %d\n " , nread);
402-
403- if (nread < 0 && (errno == EINTR || errno == EAGAIN)) {
404- DPRINTF (" tty_to_commlib: EINTR or EAGAIN\n " );
405- /* do nothing */
406- } else if (nread <= 0 ) {
407- do_exit = true ;
408- } else {
409- DPRINTF (" tty_to_commlib: writing to commlib: %d bytes\n " , nread);
410- if (suspend_handler (g_comm_handle, g_hostname, g_is_rsh, g_suspend_remote, g_pid, &dbuf) == 1 ) {
411- if (comm_write_message (g_comm_handle, g_hostname,
412- COMM_CLIENT, 1 , (unsigned char *)pbuf,
413- (unsigned long )nread, STDIN_DATA_MSG, &err_msg) != (unsigned long )nread) {
414- DPRINTF (" tty_to_commlib: couldn't write all data\n " );
415- } else {
416- DPRINTF (" tty_to_commlib: data successfully written\n " );
417- }
408+ if (FD_ISSET (STDIN_FILENO, &read_fds)) {
409+ DPRINTF (" tty_to_commlib: trying to read() from stdin\n " );
410+ int nread = read (STDIN_FILENO, pbuf, BUFSIZE-1 );
411+ pbuf[nread] = ' \0 ' ;
412+ sge_dstring_append (&dbuf, pbuf);
413+ DPRINTF (" tty_to_commlib: nread = %d\n " , nread);
414+
415+ if (nread < 0 && (errno == EINTR || errno == EAGAIN)) {
416+ DPRINTF (" tty_to_commlib: EINTR or EAGAIN\n " );
417+ /* do nothing */
418+ } else if (nread <= 0 ) {
419+ do_exit = true ;
420+ } else {
421+ DPRINTF (" tty_to_commlib: writing to commlib: %d bytes\n " , nread);
422+ if (suspend_handler (g_comm_handle, g_hostname, g_is_rsh, g_suspend_remote, g_pid, &dbuf) == 1 ) {
423+ if (comm_write_message (g_comm_handle, g_hostname,
424+ COMM_CLIENT, 1 , (unsigned char *) pbuf,
425+ (unsigned long ) nread, STDIN_DATA_MSG, &err_msg) != (unsigned long ) nread) {
426+ DPRINTF (" tty_to_commlib: couldn't write all data\n " );
427+ } else {
428+ DPRINTF (" tty_to_commlib: data successfully written\n " );
429+ }
430+ }
431+ comm_flush_write_messages (g_comm_handle, &err_msg);
418432 }
419- comm_flush_write_messages (g_comm_handle, &err_msg);
433+ } else if (FD_ISSET (g_wakeup_pipe[0 ], &read_fds)) {
434+ // If we received something on the wakeup pipe, we shall exit.
435+ // We will probably never get here as thread_testcancel() above will already terminate the thread.
436+ DPRINTF (" wakeup pipe was triggered, exiting tty_to_commlib thread\n " );
437+ do_exit = true ;
420438 }
421439 sge_dstring_free (&dbuf);
422440 } else {
@@ -444,7 +462,7 @@ void* tty_to_commlib(void *t_conf)
444462
445463 /* clean up */
446464 thread_func_cleanup (t_conf);
447-
465+
448466 DPRINTF (" tty_to_commlib: exiting tty_to_commlib thread!\n " );
449467 DRETURN (nullptr );
450468}
@@ -559,9 +577,9 @@ void* commlib_to_tty(void *t_conf)
559577 break ;
560578 case REGISTER_CTRL_MSG:
561579 /* control message */
562- /* a client registered with us. With the next loop, the
580+ /* a client registered with us. With the next loop, the
563581 * cl_commlib_trigger function will send the WINDOW_SIZE_CTRL_MSG
564- * (and perhaps some data messages), which is already in the
582+ * (and perhaps some data messages), which is already in the
565583 * send_messages list of the connection, to the client.
566584 */
567585 DPRINTF (" commlib_to_tty: received register message!\n " );
@@ -574,7 +592,7 @@ void* commlib_to_tty(void *t_conf)
574592 case UNREGISTER_CTRL_MSG:
575593 /* control message */
576594 /* the client wants to quit, as this is the last message the client
577- * sends, we can be sure to have received all messages from the
595+ * sends, we can be sure to have received all messages from the
578596 * client. We answer with a UNREGISTER_RESPONSE_CTRL_MSG so
579597 * the client knows that it can quit now. We can quit, also.
580598 */
@@ -591,7 +609,7 @@ void* commlib_to_tty(void *t_conf)
591609 * If the job was signalled, the exit code is 128+signal.
592610 */
593611 sscanf (buf, " %d" , &g_exit_status);
594- comm_write_message (g_comm_handle, g_hostname, COMM_CLIENT, 1 ,
612+ comm_write_message (g_comm_handle, g_hostname, COMM_CLIENT, 1 ,
595613 (unsigned char *)" " , 1 , UNREGISTER_RESPONSE_CTRL_MSG, &err_msg);
596614
597615 DPRINTF (" commlib_to_tty: received exit_status from shepherd: %d\n " , g_exit_status);
@@ -747,9 +765,9 @@ int run_ijs_server(COMM_HANDLE *handle, const char *remote_host,
747765
748766 /*
749767 * From here on, the two worker threads are doing all the work.
750- * This main thread is just waiting until the client closes the
751- * connection to us, which causes the commlib_to_tty thread to
752- * exit. Then it closes the tty_to_commlib thread, too, and
768+ * This main thread is just waiting until the client closes the
769+ * connection to us, which causes the commlib_to_tty thread to
770+ * exit. Then it closes the tty_to_commlib thread, too, and
753771 * cleans up everything.
754772 */
755773 DPRINTF (" waiting for end of commlib_to_tty thread\n " );
@@ -758,14 +776,19 @@ int run_ijs_server(COMM_HANDLE *handle, const char *remote_host,
758776 DPRINTF (" shutting down tty_to_commlib thread\n " );
759777 thread_shutdown (pthread_tty_to_commlib);
760778
761- /*
762- * Close stdin to awake the tty_to_commlib-thread from the select() call.
763- * thread_shutdown() doesn't work on all architectures.
764- */
765- close (STDIN_FILENO);
779+ // write a byte into the wakeup pipe to wake up the tty_to_commlib thread
780+ if (g_wakeup_pipe[1 ] != -1 ) {
781+ DPRINTF (" tty_to_commlib: writing wakeup byte to pipe\n " );
782+ if (write (g_wakeup_pipe[1 ], " x" , 1 ) < 0 ) {
783+ DPRINTF (" tty_to_commlib: write() to wakeup pipe failed: %s\n " , strerror (errno));
784+ }
785+ } else {
786+ DPRINTF (" tty_to_commlib: g_wakeup_pipe[1] is -1, not writing wakeup byte\n " );
787+ }
766788
767789 DPRINTF (" waiting for end of tty_to_commlib thread\n " );
768790 thread_join (pthread_tty_to_commlib);
791+ DPRINTF (" tty_to_commlib thread terminated\n " );
769792cleanup:
770793 /*
771794 * Set our terminal back to 'unraw' mode. Should be done automatically
@@ -775,7 +798,7 @@ int run_ijs_server(COMM_HANDLE *handle, const char *remote_host,
775798 DPRINTF (" terminal_leave_raw_mode() returned %s (%d)\n " , strerror (ret), ret);
776799 if (ret != 0 ) {
777800 sge_dstring_sprintf (p_err_msg, " error resetting terminal mode: %s (%d)" , strerror (ret), ret);
778- ret_val = 7 ;
801+ ret_val = 7 ;
779802 }
780803
781804 *p_exit_status = g_exit_status;
@@ -795,7 +818,7 @@ int run_ijs_server(COMM_HANDLE *handle, const char *remote_host,
795818*
796819* FUNCTION
797820* Starts the commlib server for the commlib connection between the shepherd
798- * of the interactive job (qrsh/qlogin) and the qrsh/qlogin command.
821+ * of the interactive job (qrsh/qlogin) and the qrsh/qlogin command.
799822* Over this connection the stdin/stdout/stderr input/output is transferred.
800823*
801824* INPUTS
@@ -815,7 +838,7 @@ int run_ijs_server(COMM_HANDLE *handle, const char *remote_host,
815838* 2: Can't set connection parameters
816839*
817840* NOTES
818- * MT-NOTE: start_builtin_ijs_server() is not MT safe
841+ * MT-NOTE: start_builtin_ijs_server() is not MT safe
819842*
820843* SEE ALSO
821844* sge_client_ijs/run_ijs_server()
@@ -829,7 +852,7 @@ int start_ijs_server(bool csp_mode, const char* username,
829852
830853 DENTER (TOP_LAYER);
831854
832- /* we must copy the hostname here to a global variable, because the
855+ /* we must copy the hostname here to a global variable, because the
833856 * worker threads need it later.
834857 * It gets freed in force_ijs_server_shutdown().
835858 * TODO: Cleaner solution for this!
@@ -856,7 +879,7 @@ int start_ijs_server(bool csp_mode, const char* username,
856879* interactive job support
857880*
858881* SYNOPSIS
859- * int stop_ijs_server(COMM_HANDLE **phandle, dstring *p_err_msg)
882+ * int stop_ijs_server(COMM_HANDLE **phandle, dstring *p_err_msg)
860883*
861884* FUNCTION
862885* Stops the commlib server for the commlib connection between the shepherd
@@ -875,7 +898,7 @@ int start_ijs_server(bool csp_mode, const char* username,
875898* see p_err_msg for details
876899*
877900* NOTES
878- * MT-NOTE: stop_ijs_server() is not MT safe
901+ * MT-NOTE: stop_ijs_server() is not MT safe
879902*
880903* SEE ALSO
881904* sge_client_ijs/start_ijs_server()
@@ -914,8 +937,8 @@ int stop_ijs_server(COMM_HANDLE **phandle, dstring *p_err_msg)
914937* interactive job support to shut down
915938*
916939* SYNOPSIS
917- * int force_ijs_server_shutdown(COMM_HANDLE **phandle, const char
918- * *this_component, dstring *p_err_msg)
940+ * int force_ijs_server_shutdown(COMM_HANDLE **phandle, const char
941+ * *this_component, dstring *p_err_msg)
919942*
920943* FUNCTION
921944* Forces the commlib server for the builtin interactive job support to shut
@@ -933,7 +956,7 @@ int stop_ijs_server(COMM_HANDLE **phandle, dstring *p_err_msg)
933956* 2: Can't shut down connection, see p_err_msg for details
934957*
935958* NOTES
936- * MT-NOTE: force_ijs_server_shutdown() is not MT safe
959+ * MT-NOTE: force_ijs_server_shutdown() is not MT safe
937960*
938961* SEE ALSO
939962* sge_client_ijs/start_ijs_server()
0 commit comments