@@ -16,46 +16,52 @@ package vm
1616import (
1717 "context"
1818 "net"
19+ "strings"
1920 "time"
2021
2122 "github.com/mdlayher/vsock"
2223 "github.com/sirupsen/logrus"
2324)
2425
26+ const (
27+ vsockConnectTimeout = 20 * time .Second
28+ )
29+
2530// VSockDial attempts to connect to a vsock listener at the provided cid and port with a hardcoded number
2631// of retries.
2732func VSockDial (reqCtx context.Context , logger * logrus.Entry , contextID , port uint32 ) (net.Conn , error ) {
28- // VM should start within 200ms, vsock dial will make retries at 100ms, 200ms, 400ms, 800ms, 1.6s, 3.2s, 6.4s
29- const (
30- retryCount = 7
31- initialDelay = 100 * time .Millisecond
32- delayMultiplier = 2
33- )
33+ // Retries occur every 100ms up to vsockConnectTimeout
34+ const retryInterval = 100 * time .Millisecond
35+ ctx , cancel := context .WithTimeout (reqCtx , vsockConnectTimeout )
36+ defer cancel ()
3437
35- var lastErr error
36- var currentDelay = initialDelay
38+ var attemptCount int
39+ for range time .NewTicker (retryInterval ).C {
40+ attemptCount ++
41+ logger = logger .WithField ("attempt" , attemptCount )
3742
38- for i := 1 ; i <= retryCount ; i ++ {
3943 select {
40- case <- reqCtx .Done ():
41- return nil , reqCtx .Err ()
44+ case <- ctx .Done ():
45+ return nil , ctx .Err ()
4246 default :
4347 conn , err := vsock .Dial (contextID , port )
4448 if err == nil {
45- logger .WithField ("connection" , conn ).Debug ("Dial succeeded" )
49+ logger .WithField ("connection" , conn ).Debug ("vsock dial succeeded" )
4650 return conn , nil
4751 }
4852
49- logger .WithError (err ).Warnf ("vsock dial failed (attempt %d of %d), will retry in %s" , i , retryCount , currentDelay )
50- time .Sleep (currentDelay )
53+ // ENXIO and ECONNRESET can be returned while the VM+agent are still in the midst of booting
54+ if isTemporaryNetErr (err ) || isENXIO (err ) || isECONNRESET (err ) {
55+ logger .WithError (err ).Debug ("temporary vsock dial failure" )
56+ continue
57+ }
5158
52- lastErr = err
53- currentDelay *= delayMultiplier
59+ logger . WithError ( err ). Error ( "non-temporary vsock dial failure" )
60+ return nil , err
5461 }
5562 }
5663
57- logger .WithError (lastErr ).WithFields (logrus.Fields {"context_id" : contextID , "port" : port }).Error ("vsock dial failed" )
58- return nil , lastErr
64+ panic ("unreachable code" ) // appeases the compiler, which doesn't know the for loop is infinite
5965}
6066
6167// VSockDialConnector provides an IOConnector interface to the VSockDial function.
@@ -77,6 +83,47 @@ func VSockDialConnector(contextID, port uint32) IOConnector {
7783 }
7884}
7985
86+ func vsockAccept (reqCtx context.Context , logger * logrus.Entry , port uint32 ) (net.Conn , error ) {
87+ listener , err := vsock .Listen (port )
88+ if err != nil {
89+ return nil , err
90+ }
91+
92+ defer listener .Close ()
93+
94+ // Retries occur every 10ms up to vsockConnectTimeout
95+ const retryInterval = 10 * time .Millisecond
96+ ctx , cancel := context .WithTimeout (reqCtx , vsockConnectTimeout )
97+ defer cancel ()
98+
99+ var attemptCount int
100+ for range time .NewTicker (retryInterval ).C {
101+ attemptCount ++
102+ logger = logger .WithField ("attempt" , attemptCount )
103+
104+ select {
105+ case <- ctx .Done ():
106+ return nil , ctx .Err ()
107+ default :
108+ // accept is non-blocking so try to accept until we get a connection
109+ conn , err := listener .Accept ()
110+ if err == nil {
111+ return conn , nil
112+ }
113+
114+ if isTemporaryNetErr (err ) {
115+ logger .WithError (err ).Debug ("temporary stdio vsock accept failure" )
116+ continue
117+ }
118+
119+ logger .WithError (err ).Error ("non-temporary stdio vsock accept failure" )
120+ return nil , err
121+ }
122+ }
123+
124+ panic ("unreachable code" ) // appeases the compiler, which doesn't know the for loop is infinite
125+ }
126+
80127// VSockAcceptConnector provides an IOConnector that establishes the connection by listening on the provided
81128// vsock port and accepting the first connection that comes in.
82129func VSockAcceptConnector (port uint32 ) IOConnector {
@@ -86,47 +133,11 @@ func VSockAcceptConnector(port uint32) IOConnector {
86133 go func () {
87134 defer close (returnCh )
88135
89- listener , err := vsock .Listen (port )
90- if err != nil {
91- returnCh <- IOConnectorResult {
92- Err : err ,
93- }
94- return
95- }
96-
97- defer listener .Close ()
98-
99- for range time .NewTicker (10 * time .Millisecond ).C {
100- select {
101- case <- procCtx .Done ():
102- returnCh <- IOConnectorResult {
103- Err : procCtx .Err (),
104- }
105- return
106- default :
107- // accept is non-blocking so try to accept until we get a connection
108- conn , err := listener .Accept ()
109- if err == nil {
110- returnCh <- IOConnectorResult {
111- ReadWriteCloser : conn ,
112- }
113- return
114- }
115-
116- if isTemporaryNetErr (err ) {
117- logger .WithError (err ).Debug ("temporary stdio vsock accept failure" )
118- continue
119- }
120-
121- logger .WithError (err ).Error ("non-temporary stdio vsock accept failure" )
122- returnCh <- IOConnectorResult {
123- Err : err ,
124- }
125- return
126- }
136+ conn , err := vsockAccept (procCtx , logger , port )
137+ returnCh <- IOConnectorResult {
138+ ReadWriteCloser : conn ,
139+ Err : err ,
127140 }
128-
129- panic ("unreachable code" ) // appeases the compiler, which doesn't know the for loop is infinite
130141 }()
131142
132143 return returnCh
@@ -140,3 +151,16 @@ func isTemporaryNetErr(err error) bool {
140151
141152 return err != nil && ok && terr .Temporary ()
142153}
154+
155+ // Unfortunately, as "documented" on various online forums, there's no ideal way to
156+ // test for actual Linux error codes returned by the net library or wrappers
157+ // around that library. The common approach is to fall back on string matching,
158+ // which is done for the functions below
159+
160+ func isENXIO (err error ) bool {
161+ return strings .HasSuffix (err .Error (), "no such device" )
162+ }
163+
164+ func isECONNRESET (err error ) bool {
165+ return strings .HasSuffix (err .Error (), "connection reset by peer" )
166+ }
0 commit comments