@@ -23,6 +23,7 @@ pub struct Acquired {
2323impl Client {
2424 pub fn new ( limit : usize ) -> io:: Result < Client > {
2525 let client = unsafe { Client :: mk ( ) ? } ;
26+ client. configure_capacity ( limit) ?;
2627 // I don't think the character written here matters, but I could be
2728 // wrong!
2829 for _ in 0 ..limit {
@@ -63,6 +64,70 @@ impl Client {
6364 Ok ( Client :: from_fds ( pipes[ 0 ] , pipes[ 1 ] ) )
6465 }
6566
67+ fn configure_capacity ( & self , required_capacity : usize ) -> io:: Result < ( ) > {
68+ // On Linux we may need to increase the capacity of the pipe for the
69+ // jobserver to work correctly. Linux seems to exhibit behavior where it
70+ // implements a ring-buffer internally but apparently the ring-ness of
71+ // the ring-buffer is connected to *pages* of the ring buffer rather
72+ // than actual bytes of the ring buffer. This means that if the pipe has
73+ // only one page of capacity we can hit a possible deadlock situation
74+ // where a bunch of threads are writing to the pipe but they're all
75+ // blocked, despite the current used capacity of the pipe being less
76+ // than a page.
77+ //
78+ // This was first discovered in rust-lang/cargo#9739 where a system with
79+ // a large amount of concurrency would hang in `cargo build` when the
80+ // jobserver pipe only had one page of capacity. This was reduced to a
81+ // reproduction program [1] which indeed showed that the system would
82+ // deadlock if the capacity of the pipe was just one page.
83+ //
84+ // To fix this issue, on Linux only, we may increase the capacity of the
85+ // pipe. The main thing here is that if the capacity of the pipe is a
86+ // single page we try to increase it to two pages, otherwise we fail
87+ // because a deadlock might happen. While we're at it this goes ahead
88+ // and factors in the `required_capacity` requested by the client to
89+ // this calculation as well. If for some reason you want 10_000 units of
90+ // concurrency in the pipe that means we'll need more than 2 pages
91+ // (typically 8192 bytes), so we round that up to 3 pages as well.
92+ //
93+ // Someone with more understanding of linux pipes and how they buffer
94+ // internally should probably review this at some point. The exact cause
95+ // of the deadlock seems a little uncertain and it's not clear why the
96+ // example program [1] deadlocks and why simply adding another page
97+ // fixes things. Is this a kernel bug? Do we need to always guarantee at
98+ // least one free page? I'm not sure! Hopefully for now this is enough
99+ // to fix the problem until machines start having more than 4k cores,
100+ // which seems like it might be awhile.
101+ //
102+ // [1]: https://github.com/rust-lang/cargo/issues/9739#issuecomment-889183009
103+ #[ cfg( target_os = "linux" ) ]
104+ unsafe {
105+ let page_size = libc:: sysconf ( libc:: _SC_PAGESIZE) ;
106+ let actual_capacity = cvt ( libc:: fcntl ( self . write . as_raw_fd ( ) , libc:: F_GETPIPE_SZ ) ) ?;
107+
108+ if let Some ( c) = calculate_capacity (
109+ required_capacity,
110+ actual_capacity as usize ,
111+ page_size as usize ,
112+ ) {
113+ cvt ( libc:: fcntl ( self . write . as_raw_fd ( ) , libc:: F_SETPIPE_SZ , c) ) . map_err ( |e| {
114+ io:: Error :: new (
115+ e. kind ( ) ,
116+ format ! (
117+ "failed to increase jobserver pipe capacity from {} to {}; \
118+ jobserver otherwise might deadlock",
119+ actual_capacity, c,
120+ ) ,
121+ )
122+
123+ // ...
124+ } ) ?;
125+ }
126+ }
127+
128+ Ok ( ( ) )
129+ }
130+
66131 pub unsafe fn open ( s : & str ) -> Option < Client > {
67132 let mut parts = s. splitn ( 2 , ',' ) ;
68133 let read = parts. next ( ) . unwrap ( ) ;
@@ -337,3 +402,44 @@ extern "C" fn sigusr1_handler(
337402) {
338403 // nothing to do
339404}
405+
406+ #[ allow( dead_code) ]
407+ fn calculate_capacity (
408+ required_capacity : usize ,
409+ actual_capacity : usize ,
410+ page_size : usize ,
411+ ) -> Option < usize > {
412+ if actual_capacity < required_capacity {
413+ let mut rounded_capacity = round_up_to ( required_capacity, page_size) ;
414+ if rounded_capacity < page_size * 2 {
415+ rounded_capacity += page_size;
416+ }
417+ return Some ( rounded_capacity) ;
418+ }
419+
420+ if actual_capacity <= page_size {
421+ return Some ( page_size * 2 ) ;
422+ }
423+
424+ return None ;
425+
426+ fn round_up_to ( a : usize , b : usize ) -> usize {
427+ assert ! ( b. is_power_of_two( ) ) ;
428+ ( a + ( b - 1 ) ) & ( !( b - 1 ) )
429+ }
430+ }
431+
432+ #[ cfg( test) ]
433+ mod tests {
434+ use super :: calculate_capacity;
435+
436+ #[ test]
437+ fn test_calculate_capacity ( ) {
438+ assert_eq ! ( calculate_capacity( 1 , 65536 , 4096 ) , None ) ;
439+ assert_eq ! ( calculate_capacity( 500 , 65536 , 4096 ) , None ) ;
440+ assert_eq ! ( calculate_capacity( 5000 , 4096 , 4096 ) , Some ( 8192 ) ) ;
441+ assert_eq ! ( calculate_capacity( 1 , 4096 , 4096 ) , Some ( 8192 ) ) ;
442+ assert_eq ! ( calculate_capacity( 4096 , 4096 , 4096 ) , Some ( 8192 ) ) ;
443+ assert_eq ! ( calculate_capacity( 8192 , 4096 , 4096 ) , Some ( 8192 ) ) ;
444+ }
445+ }
0 commit comments