@@ -62,10 +62,12 @@ void resume_other_threads();
6262void take_thread_lock ();
6363void release_thread_lock ();
6464void notify_paused ();
65+ uint32_t currently_paused ();
6566void wait_paused (uint32_t expected, const struct timespec *timeout);
6667int memserver_start ();
6768int memserver_entry (void *);
6869bool run_backtracer (int fd);
70+ void format_unsigned (unsigned u, char buffer[22 ]);
6971
7072ssize_t safe_read (int fd, void *buf, size_t len) {
7173 uint8_t *ptr = (uint8_t *)buf;
@@ -88,7 +90,7 @@ ssize_t safe_read(int fd, void *buf, size_t len) {
8890}
8991
9092ssize_t safe_write (int fd, const void *buf, size_t len) {
91- const uint8_t *ptr = (uint8_t *)buf;
93+ const uint8_t *ptr = (const uint8_t *)buf;
9294 const uint8_t *end = ptr + len;
9395 ssize_t total = 0 ;
9496
@@ -308,6 +310,124 @@ getdents(int fd, void *buf, size_t bufsiz)
308310 return syscall (SYS_getdents64, fd, buf, bufsiz);
309311}
310312
313+ /* Find the signal to use to suspend the given thread.
314+
315+ Sadly, libdispatch blocks SIGUSR1, so we can't just use that everywhere;
316+ and on Ubuntu 20.04 *something* is starting a thread with SIGPROF blocked,
317+ so we can't just use that either.
318+
319+ We also can't modify the signal mask for another thread, since there is
320+ no syscall to do that.
321+
322+ As a workaround, read /proc/<pid>/task/<tid>/status to find the signal
323+ mask so that we can decide which signal to try and send. */
324+ int
325+ signal_for_suspend (int pid, int tid)
326+ {
327+ char pid_buffer[22 ];
328+ char tid_buffer[22 ];
329+
330+ format_unsigned ((unsigned )pid, pid_buffer);
331+ format_unsigned ((unsigned )tid, tid_buffer);
332+
333+ char status_file[6 + 22 + 6 + 22 + 7 + 1 ];
334+
335+ strcpy (status_file, " /proc/" ); // 6
336+ strcat (status_file, pid_buffer); // 22
337+ strcat (status_file, " /task/" ); // 6
338+ strcat (status_file, tid_buffer); // 22
339+ strcat (status_file, " /status" ); // 7 + 1 for NUL
340+
341+ int fd = open (status_file, O_RDONLY);
342+ if (fd < 0 )
343+ return -1 ;
344+
345+ enum match_state {
346+ Matching,
347+ EatLine,
348+ AfterMatch,
349+ InHex,
350+
351+ // states after this terminate the loop
352+ Done,
353+ Bad
354+ };
355+
356+ enum match_state state = Matching;
357+ const char *toMatch = " SigBlk:" ;
358+ const char *matchPtr = toMatch;
359+ char buffer[256 ];
360+ uint64_t mask = 0 ;
361+ ssize_t count;
362+ while (state < Done && (count = read (fd, buffer, sizeof (buffer))) > 0 ) {
363+ char *ptr = buffer;
364+ char *end = buffer + count;
365+
366+ while (state < Done && ptr < end) {
367+ int ch = *ptr++;
368+
369+ switch (state) {
370+ case Matching:
371+ if (ch != *matchPtr) {
372+ state = EatLine;
373+ matchPtr = toMatch;
374+ } else if (!*++matchPtr) {
375+ state = AfterMatch;
376+ }
377+ break ;
378+ case EatLine:
379+ if (ch == ' \n ' )
380+ state = Matching;
381+ break ;
382+ case AfterMatch:
383+ if (ch == ' ' || ch == ' \t ' ) {
384+ break ;
385+ }
386+ state = InHex;
387+ SWIFT_FALLTHROUGH;
388+ case InHex:
389+ if (ch >= ' 0' && ch <= ' 9' ) {
390+ mask = (mask << 4 ) | (ch - ' 0' );
391+ } else if (ch >= ' a' && ch <= ' f' ) {
392+ mask = (mask << 4 ) | (ch - ' a' + 10 );
393+ } else if (ch >= ' A' && ch <= ' F' ) {
394+ mask = (mask << 4 ) | (ch - ' A' + 10 );
395+ } else if (ch == ' \n ' ) {
396+ state = Done;
397+ break ;
398+ } else {
399+ state = Bad;
400+ }
401+ break ;
402+ case Done:
403+ case Bad:
404+ break ;
405+ }
406+ }
407+ }
408+
409+ close (fd);
410+
411+ if (state == Done) {
412+ if (!(mask & (1 << (SIGUSR1 - 1 ))))
413+ return SIGUSR1;
414+ else if (!(mask & (1 << (SIGUSR2 - 1 ))))
415+ return SIGUSR2;
416+ else if (!(mask & (1 << (SIGPROF - 1 ))))
417+ return SIGPROF;
418+ else
419+ return -1 ;
420+ }
421+
422+ return -1 ;
423+ }
424+
425+ // Write a string to stderr
426+ void
427+ warn (const char *str) {
428+ write (STDERR_FILENO, str, strlen (str));
429+ }
430+
311431/* Stop all other threads in this process; we do this by establishing a
312432 signal handler for SIGPROF, then iterating through the threads sending
313433 SIGPROF.
@@ -321,21 +441,23 @@ getdents(int fd, void *buf, size_t bufsiz)
321441void
322442suspend_other_threads (struct thread *self)
323443{
324- struct sigaction sa, sa_old ;
444+ struct sigaction sa, sa_old_prof, sa_old_usr1, sa_old_usr2 ;
325445
326446 // Take the lock
327447 take_thread_lock ();
328448
329449 // Start the thread list with this thread
330450 reset_threads (self);
331451
332- // Swap out the SIGPROF signal handler first
452+ // Swap out the signal handlers first
333453 sigfillset (&sa.sa_mask );
334- sa.sa_flags = SA_NODEFER ;
454+ sa.sa_flags = 0 ;
335455 sa.sa_handler = NULL ;
336456 sa.sa_sigaction = pause_thread;
337457
338- sigaction (SIGPROF, &sa, &sa_old);
458+ sigaction (SIGPROF, &sa, &sa_old_prof);
459+ sigaction (SIGUSR1, &sa, &sa_old_usr1);
460+ sigaction (SIGUSR2, &sa, &sa_old_usr2);
339461
340462 /* Now scan /proc/self/task to get the tids of the threads in this
341463 process. We need to ignore our own thread. */
@@ -346,11 +468,14 @@ suspend_other_threads(struct thread *self)
346468 size_t offset = 0 ;
347469 size_t count = 0 ;
348470
349- uint32_t thread_count = 0 ;
350- uint32_t old_thread_count ;
471+ unsigned max_loops = 15 ;
472+ uint32_t pending = 0 ;
351473
352474 do {
353- old_thread_count = thread_count;
475+ uint32_t paused = currently_paused ();
476+
477+ pending = 0 ;
478+
354479 lseek (fd, 0 , SEEK_SET);
355480
356481 for (;;) {
@@ -372,21 +497,35 @@ suspend_other_threads(struct thread *self)
372497 int tid = atoi (dp->d_name );
373498
374499 if ((int64_t )tid != self->tid && !seen_thread (tid)) {
375- tgkill (our_pid, tid, SIGPROF);
376- ++thread_count;
500+ int sig_to_use = signal_for_suspend (our_pid, tid);
501+
502+ if (sig_to_use > 0 ) {
503+ tgkill (our_pid, tid, sig_to_use);
504+ ++pending;
505+ } else {
506+ warn (" swift-runtime: unable to suspend thread " );
507+ warn (dp->d_name );
508+ warn (" \n " );
509+ }
377510 }
378511 }
379512
380- // Wait up to 5 seconds for the threads to pause
381- struct timespec timeout = { 5 , 0 };
382- wait_paused (thread_count, &timeout);
383- } while (old_thread_count != thread_count);
513+ // If we find no new threads, we're done
514+ if (!pending)
515+ break ;
516+
517+ // Wait for the threads to suspend
518+ struct timespec timeout = { 2 , 0 };
519+ wait_paused (paused + pending, &timeout);
520+ } while (max_loops--);
384521
385522 // Close the directory
386523 close (fd);
387524
388- // Finally, reset the signal handler
389- sigaction (SIGPROF, &sa_old, NULL );
525+ // Finally, reset the signal handlers
526+ sigaction (SIGPROF, &sa_old_prof, NULL );
527+ sigaction (SIGUSR1, &sa_old_usr1, NULL );
528+ sigaction (SIGUSR2, &sa_old_usr2, NULL );
390529}
391530
392531void
@@ -441,6 +580,12 @@ notify_paused()
441580 futex (&threads_paused, FUTEX_WAKE, 1 , NULL , NULL , 0 );
442581}
443582
583+ uint32_t
584+ currently_paused ()
585+ {
586+ return __atomic_load_n (&threads_paused, __ATOMIC_ACQUIRE);
587+ }
588+
444589void
445590wait_paused (uint32_t expected, const struct timespec *timeout)
446591{
0 commit comments