@@ -1828,7 +1828,7 @@ impl ProxyService {
18281828 ) ;
18291829 }
18301830 ResolveRouteOutput :: Response ( _) => unreachable ! ( ) ,
1831- ResolveRouteOutput :: CustomServe ( mut handlers ) => {
1831+ ResolveRouteOutput :: CustomServe ( mut handler ) => {
18321832 tracing:: debug!( %req_path, "Spawning task to handle WebSocket communication" ) ;
18331833 let mut request_context = request_context. clone ( ) ;
18341834 let req_headers = req_headers. clone ( ) ;
@@ -1845,7 +1845,7 @@ impl ProxyService {
18451845 . context ( "failed initiating websocket handle" ) ?;
18461846
18471847 loop {
1848- match handlers
1848+ match handler
18491849 . handle_websocket (
18501850 ws_handle. clone ( ) ,
18511851 & req_headers,
@@ -1895,18 +1895,26 @@ impl ProxyService {
18951895 Err ( err) => {
18961896 tracing:: debug!( ?err, "websocket handler error" ) ;
18971897
1898- // Denotes that the connection did not fail, but needs to be retried to
1899- // resole a new target
1900- let ws_retry = is_ws_retry ( & err) ;
1898+ // Denotes that the connection did not fail, but the downstream has closed
1899+ let ws_hibernate = is_ws_hibernate ( & err) ;
19011900
1902- if ws_retry {
1901+ if ws_hibernate {
19031902 attempts = 0 ;
19041903 } else {
19051904 attempts += 1 ;
19061905 }
19071906
1908- if attempts > max_attempts
1909- || ( !is_retryable_ws_error ( & err) && !ws_retry)
1907+ if ws_hibernate {
1908+ // After this function returns:
1909+ // - the route will be resolved again
1910+ // - the websocket will connect to the new downstream target
1911+ // - the gateway will continue reading messages from the client ws
1912+ // (starting with the message that caused the hibernation to end)
1913+ handler
1914+ . handle_websocket_hibernation ( ws_handle. clone ( ) )
1915+ . await ?;
1916+ } else if attempts > max_attempts
1917+ || !is_retryable_ws_error ( & err)
19101918 {
19111919 tracing:: debug!(
19121920 ?attempts,
@@ -1929,79 +1937,79 @@ impl ProxyService {
19291937
19301938 break ;
19311939 } else {
1932- if !ws_retry {
1933- let backoff = ProxyService :: calculate_backoff (
1934- attempts,
1935- initial_interval,
1936- ) ;
1940+ let backoff = ProxyService :: calculate_backoff (
1941+ attempts,
1942+ initial_interval,
1943+ ) ;
19371944
1938- tracing:: debug!(
1939- ?backoff,
1940- "WebSocket attempt {attempts} failed (service unavailable)"
1941- ) ;
1945+ tracing:: debug!(
1946+ ?backoff,
1947+ "WebSocket attempt {attempts} failed (service unavailable)"
1948+ ) ;
19421949
1943- tokio:: time:: sleep ( backoff) . await ;
1944- }
1950+ // Apply backoff for retryable error
1951+ tokio:: time:: sleep ( backoff) . await ;
1952+ }
19451953
1946- match state
1947- . resolve_route (
1948- & req_host,
1949- & req_path,
1950- & req_method,
1951- state. port_type . clone ( ) ,
1952- & req_headers,
1953- true ,
1954- )
1955- . await
1956- {
1957- Ok ( ResolveRouteOutput :: CustomServe ( new_handlers) ) => {
1958- handlers = new_handlers;
1959- continue ;
1960- }
1961- Ok ( ResolveRouteOutput :: Response ( response) ) => {
1962- ws_handle
1963- . send ( to_hyper_close ( Some ( str_to_close_frame (
1964- response. message . as_ref ( ) ,
1965- ) ) ) )
1966- . await ?;
1967-
1968- // Flush to ensure close frame is sent
1969- ws_handle. flush ( ) . await ?;
1970-
1971- // Keep TCP connection open briefly to allow client to process close
1972- tokio:: time:: sleep ( WEBSOCKET_CLOSE_LINGER ) . await ;
1973- }
1974- Ok ( ResolveRouteOutput :: Target ( _) ) => {
1975- ws_handle
1976- . send ( to_hyper_close ( Some ( err_to_close_frame (
1977- errors:: WebSocketTargetChanged . build ( ) ,
1978- ray_id,
1979- ) ) ) )
1980- . await ?;
1954+ // Retry route resolution
1955+ match state
1956+ . resolve_route (
1957+ & req_host,
1958+ & req_path,
1959+ & req_method,
1960+ state. port_type . clone ( ) ,
1961+ & req_headers,
1962+ true ,
1963+ )
1964+ . await
1965+ {
1966+ Ok ( ResolveRouteOutput :: CustomServe ( new_handler) ) => {
1967+ handler = new_handler;
1968+ continue ;
1969+ }
1970+ Ok ( ResolveRouteOutput :: Response ( response) ) => {
1971+ ws_handle
1972+ . send ( to_hyper_close ( Some ( str_to_close_frame (
1973+ response. message . as_ref ( ) ,
1974+ ) ) ) )
1975+ . await ?;
1976+
1977+ // Flush to ensure close frame is sent
1978+ ws_handle. flush ( ) . await ?;
1979+
1980+ // Keep TCP connection open briefly to allow client to process close
1981+ tokio:: time:: sleep ( WEBSOCKET_CLOSE_LINGER ) . await ;
1982+ }
1983+ Ok ( ResolveRouteOutput :: Target ( _) ) => {
1984+ ws_handle
1985+ . send ( to_hyper_close ( Some ( err_to_close_frame (
1986+ errors:: WebSocketTargetChanged . build ( ) ,
1987+ ray_id,
1988+ ) ) ) )
1989+ . await ?;
19811990
1982- // Flush to ensure close frame is sent
1983- ws_handle. flush ( ) . await ?;
1991+ // Flush to ensure close frame is sent
1992+ ws_handle. flush ( ) . await ?;
19841993
1985- // Keep TCP connection open briefly to allow client to process close
1986- tokio:: time:: sleep ( WEBSOCKET_CLOSE_LINGER ) . await ;
1994+ // Keep TCP connection open briefly to allow client to process close
1995+ tokio:: time:: sleep ( WEBSOCKET_CLOSE_LINGER ) . await ;
19871996
1988- break ;
1989- }
1990- Err ( err) => {
1991- ws_handle
1992- . send ( to_hyper_close ( Some ( err_to_close_frame (
1993- err, ray_id,
1994- ) ) ) )
1995- . await ?;
1997+ break ;
1998+ }
1999+ Err ( err) => {
2000+ ws_handle
2001+ . send ( to_hyper_close ( Some ( err_to_close_frame (
2002+ err, ray_id,
2003+ ) ) ) )
2004+ . await ?;
19962005
1997- // Flush to ensure close frame is sent
1998- ws_handle. flush ( ) . await ?;
2006+ // Flush to ensure close frame is sent
2007+ ws_handle. flush ( ) . await ?;
19992008
2000- // Keep TCP connection open briefly to allow client to process close
2001- tokio:: time:: sleep ( WEBSOCKET_CLOSE_LINGER ) . await ;
2009+ // Keep TCP connection open briefly to allow client to process close
2010+ tokio:: time:: sleep ( WEBSOCKET_CLOSE_LINGER ) . await ;
20022011
2003- break ;
2004- }
2012+ break ;
20052013 }
20062014 }
20072015 }
@@ -2509,9 +2517,9 @@ fn is_retryable_ws_error(err: &anyhow::Error) -> bool {
25092517 }
25102518}
25112519
2512- fn is_ws_retry ( err : & anyhow:: Error ) -> bool {
2520+ fn is_ws_hibernate ( err : & anyhow:: Error ) -> bool {
25132521 if let Some ( rivet_err) = err. chain ( ) . find_map ( |x| x. downcast_ref :: < RivetError > ( ) ) {
2514- rivet_err. group ( ) == "guard" && rivet_err. code ( ) == "websocket_service_retry "
2522+ rivet_err. group ( ) == "guard" && rivet_err. code ( ) == "websocket_service_hibernate "
25152523 } else {
25162524 false
25172525 }
0 commit comments