Skip to content

Commit 2c4b435

Browse files
committed
Add an option to stop polling on persistent errors
1 parent b50d076 commit 2c4b435

File tree

2 files changed

+50
-5
lines changed

2 files changed

+50
-5
lines changed

projects/jupyter-server-ydoc/jupyter_server_ydoc/app.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,14 @@ class YDocExtension(ExtensionApp):
5050
saving changes from the front-end.""",
5151
)
5252

53+
file_stop_poll_on_errors_after = Float(
54+
7 * 24 * 60 * 60,
55+
allow_none=True,
56+
config=True,
57+
help="""The duration in seconds to stop polling a file after consecutive errors.
58+
Defaults to 7 days, if None then polling will not stop on errors.""",
59+
)
60+
5361
document_cleanup_delay = Float(
5462
60,
5563
allow_none=True,
@@ -121,7 +129,10 @@ def initialize_handlers(self):
121129
# the global app settings in which the file id manager will register
122130
# itself maybe at a later time.
123131
self.file_loaders = FileLoaderMapping(
124-
self.serverapp.web_app.settings, self.log, self.file_poll_interval
132+
self.serverapp.web_app.settings,
133+
self.log,
134+
self.file_poll_interval,
135+
file_stop_poll_on_errors_after=self.file_stop_poll_on_errors_after,
125136
)
126137

127138
self.handlers.extend(

projects/jupyter-server-ydoc/jupyter_server_ydoc/loaders.py

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@
55

66
import asyncio
77
from logging import Logger, getLogger
8+
from time import time
89
from typing import Any, Callable, Coroutine
10+
from tornado.web import HTTPError
11+
from http import HTTPStatus
912

1013
from jupyter_server.services.contents.manager import (
1114
AsyncContentsManager,
@@ -29,12 +32,16 @@ def __init__(
2932
contents_manager: AsyncContentsManager | ContentsManager,
3033
log: Logger | None = None,
3134
poll_interval: float | None = None,
35+
max_consecutive_logs: int = 3,
36+
stop_poll_on_errors_after: float | None = None,
3237
) -> None:
3338
self._file_id: str = file_id
3439

3540
self._lock = asyncio.Lock()
3641
self._poll_interval = poll_interval
42+
self._stop_poll_on_errors_after = stop_poll_on_errors_after
3743
self._file_id_manager = file_id_manager
44+
self._max_consecutive_logs = max_consecutive_logs
3845
self._contents_manager = contents_manager
3946

4047
self._log = log or getLogger(__name__)
@@ -204,8 +211,8 @@ async def _watch_file(self) -> None:
204211
return
205212

206213
consecutive_error_logs = 0
207-
max_consecutive_logs = 3
208214
suppression_logged = False
215+
consecutive_errors_started = None
209216

210217
while True:
211218
try:
@@ -214,13 +221,37 @@ async def _watch_file(self) -> None:
214221
await self.maybe_notify()
215222
consecutive_error_logs = 0
216223
suppression_logged = False
224+
consecutive_errors_started = None
217225
except Exception as e:
218-
if consecutive_error_logs < max_consecutive_logs:
219-
self._log.error(f"Error watching file: {self.path}\n{e!r}", exc_info=e)
226+
# We do not want to terminate the watcher if the content manager request
227+
# fails due to timeout, server error or similar temporary issue; we only
228+
# terminate if the file is not found or we get unauthorized error for
229+
# an extended period of time.
230+
if isinstance(e, HTTPError) and e.status_code in {
231+
HTTPStatus.NOT_FOUND,
232+
HTTPStatus.UNAUTHORIZED,
233+
}:
234+
if (
235+
consecutive_errors_started
236+
and self._stop_poll_on_errors_after is not None
237+
):
238+
errors_duration = time() - consecutive_errors_started
239+
if errors_duration > self._stop_poll_on_errors_after:
240+
self._log.warning(
241+
"Stopping watching file due to consecutive errors over %s seconds: %s",
242+
self._stop_poll_on_errors_after,
243+
self.path,
244+
)
245+
break
246+
else:
247+
consecutive_errors_started = time()
248+
# Otherwise we just log the error
249+
if consecutive_error_logs < self._max_consecutive_logs:
250+
self._log.error("Error watching file %s: %s", self.path, e, exc_info=e)
220251
consecutive_error_logs += 1
221252
elif not suppression_logged:
222253
self._log.warning(
223-
"Too many errors while watching %s suppressing further logs.",
254+
"Too many errors while watching %s - suppressing further logs.",
224255
self.path,
225256
)
226257
suppression_logged = True
@@ -268,6 +299,7 @@ def __init__(
268299
settings: dict,
269300
log: Logger | None = None,
270301
file_poll_interval: float | None = None,
302+
file_stop_poll_on_errors_after: float | None = None,
271303
) -> None:
272304
"""
273305
Args:
@@ -279,6 +311,7 @@ def __init__(
279311
self.__dict: dict[str, FileLoader] = {}
280312
self.log = log or getLogger(__name__)
281313
self.file_poll_interval = file_poll_interval
314+
self._stop_poll_on_errors_after = file_stop_poll_on_errors_after
282315

283316
@property
284317
def contents_manager(self) -> AsyncContentsManager | ContentsManager:
@@ -309,6 +342,7 @@ def __getitem__(self, file_id: str) -> FileLoader:
309342
self.contents_manager,
310343
self.log,
311344
self.file_poll_interval,
345+
stop_poll_on_errors_after=self._stop_poll_on_errors_after,
312346
)
313347
self.__dict[file_id] = file
314348

0 commit comments

Comments
 (0)