5252 DocumentTooLarge ,
5353 ExecutionTimeout ,
5454 InvalidOperation ,
55+ NetworkTimeout ,
5556 NotPrimaryError ,
5657 OperationFailure ,
5758 PyMongoError ,
@@ -723,6 +724,7 @@ class PoolState:
723724 PAUSED = 1
724725 READY = 2
725726 CLOSED = 3
727+ BACKOFF = 4
726728
727729
728730# Do *not* explicitly inherit from object or Jython won't call __del__
@@ -791,6 +793,7 @@ def __init__(
791793 self ._pending = 0
792794 self ._client_id = client_id
793795 self ._backoff = 0
796+ self ._backoff_connection_time = - 1
794797 if self .enabled_for_cmap :
795798 assert self .opts ._event_listeners is not None
796799 self .opts ._event_listeners .publish_pool_created (
@@ -817,6 +820,9 @@ def __init__(
817820 async def ready (self ) -> None :
818821 # Take the lock to avoid the race condition described in PYTHON-2699.
819822 async with self .lock :
823+ # Do not set the pool as ready if in backoff.
824+ if self ._backoff :
825+ return
820826 if self .state != PoolState .READY :
821827 self .state = PoolState .READY
822828 if self .enabled_for_cmap :
@@ -846,7 +852,7 @@ async def _reset(
846852 async with self .size_cond :
847853 if self .closed :
848854 return
849- # Clear the backoff state .
855+ # Clear the backoff amount .
850856 self ._backoff = 0
851857 if self .opts .pause_enabled and pause and not self .opts .load_balanced :
852858 old_state , self .state = self .state , PoolState .PAUSED
@@ -1029,26 +1035,34 @@ async def remove_stale_sockets(self, reference_generation: int) -> None:
10291035 self .requests -= 1
10301036 self .size_cond .notify ()
10311037
1032- def _handle_connection_error (self , error : BaseException , phase : str , conn_id : int ) -> None :
1038+ def _handle_connection_error (self , error : BaseException , phase : str ) -> None :
10331039 # Handle system overload condition for non-sdam pools.
1034- # Look for an AutoReconnect error raised from a ConnectionResetError with
1035- # errno == errno.ECONNRESET or raised from an OSError that we've created due to
1036- # a closed connection.
1040+ # Look for an AutoReconnect or NetworkTimeout error.
10371041 # If found, set backoff and add error labels.
1038- if self .is_sdam or type (error ) != AutoReconnect :
1042+ if self .is_sdam or type (error ) not in ( AutoReconnect , NetworkTimeout ) :
10391043 return
1040- self ._backoff += 1
10411044 error ._add_error_label ("SystemOverloadedError" )
10421045 error ._add_error_label ("RetryableError" )
1046+ self .backoff ()
1047+
1048+ def backoff (self ):
1049+ """Set/increase backoff mode."""
1050+ self ._backoff += 1
1051+ if self .state != PoolState .BACKOFF :
1052+ self .state = PoolState .BACKOFF
1053+ if self .enabled_for_cmap :
1054+ assert self .opts ._event_listeners is not None
1055+ self .opts ._event_listeners .publish_pool_backoff (self .address , self ._backoff )
1056+ self ._backoff_connection_time = _backoff (self ._backoff ) + time .monotonic ()
1057+
10431058 # Log the pool backoff message.
10441059 if self .enabled_for_logging and _CONNECTION_LOGGER .isEnabledFor (logging .DEBUG ):
10451060 _debug_log (
10461061 _CONNECTION_LOGGER ,
1047- message = _ConnectionStatusMessage .POOL_BACKOFF ,
1062+ message = _ConnectionStatusMessage .POOL_BACKOFF % self . _backoff ,
10481063 clientId = self ._client_id ,
10491064 serverHost = self .address [0 ],
10501065 serverPort = self .address [1 ],
1051- driverConnectionId = conn_id ,
10521066 reason = _verbose_connection_error_reason (ConnectionClosedReason .POOL_BACKOFF ),
10531067 error = ConnectionClosedReason .POOL_BACKOFF ,
10541068 )
@@ -1082,10 +1096,6 @@ async def connect(self, handler: Optional[_MongoClientErrorHandler] = None) -> A
10821096 driverConnectionId = conn_id ,
10831097 )
10841098
1085- # Apply backoff if applicable.
1086- if self ._backoff :
1087- await asyncio .sleep (_backoff (self ._backoff ))
1088-
10891099 # Pass a context to determine if we successfully create a configured socket.
10901100 context = dict (has_created_socket = False )
10911101
@@ -1114,7 +1124,7 @@ async def connect(self, handler: Optional[_MongoClientErrorHandler] = None) -> A
11141124 error = ConnectionClosedReason .ERROR ,
11151125 )
11161126 if context ["has_created_socket" ]:
1117- self ._handle_connection_error (error , "handshake" , conn_id )
1127+ self ._handle_connection_error (error , "handshake" )
11181128 if isinstance (error , (IOError , OSError , * SSLErrors )):
11191129 details = _get_timeout_details (self .opts )
11201130 _raise_connection_failure (self .address , error , timeout_details = details )
@@ -1138,15 +1148,18 @@ async def connect(self, handler: Optional[_MongoClientErrorHandler] = None) -> A
11381148 except BaseException as e :
11391149 async with self .lock :
11401150 self .active_contexts .discard (conn .cancel_context )
1141- self ._handle_connection_error (e , "hello" , conn_id )
1151+ self ._handle_connection_error (e , "hello" )
11421152 await conn .close_conn (ConnectionClosedReason .ERROR )
11431153 raise
11441154
11451155 if handler :
11461156 await handler .client ._topology .receive_cluster_time (conn ._cluster_time )
11471157
11481158 # Clear the backoff state.
1149- self ._backoff = 0
1159+ if self ._backoff :
1160+ self ._backoff = 0
1161+ await self .ready ()
1162+
11501163 return conn
11511164
11521165 @contextlib .asynccontextmanager
@@ -1342,6 +1355,9 @@ async def _get_conn(
13421355 if await self ._perished (conn ):
13431356 conn = None
13441357 continue
1358+ # See if we need to wait for the backoff period.
1359+ elif self ._backoff and (self ._backoff_connection_time < time .monotonic ()):
1360+ continue
13451361 else : # We need to create a new connection
13461362 try :
13471363 conn = await self .connect (handler = handler )
0 commit comments