55package filewatcher
66
77import (
8+ "errors"
89 "io/fs"
910 "log/slog"
1011 "os"
@@ -17,22 +18,33 @@ import (
1718 "golang.org/x/tools/gopls/internal/protocol"
1819)
1920
21+ // ErrClosed is used when trying to operate on a closed Watcher.
22+ var ErrClosed = errors .New ("file watcher: watcher already closed" )
23+
2024// Watcher collects events from a [fsnotify.Watcher] and converts them into
2125// batched LSP [protocol.FileEvent]s.
2226type Watcher struct {
2327 logger * slog.Logger
2428
2529 stop chan struct {} // closed by Close to terminate run loop
2630
27- wg sync.WaitGroup // counts number of active run goroutines (max 1)
31+ // errs is an internal channel for surfacing errors from the file watcher,
32+ // distinct from the fsnotify watcher's error channel.
33+ errs chan error
34+
35+ // watchers counts the number of active watch registration goroutines,
36+ // including their error handling.
37+ watchers sync.WaitGroup
38+ runners sync.WaitGroup // counts the number of active run goroutines (max 1)
2839
2940 watcher * fsnotify.Watcher
3041
3142 mu sync.Mutex // guards all fields below
3243
33- // knownDirs tracks all known directories to help distinguish between file
34- // and directory deletion events.
35- knownDirs map [string ]bool
44+ // dirCancel maps a directory path to its cancellation channel.
45+ // A nil map indicates the watcher is closing and prevents new directory
46+ // watch registrations.
47+ dirCancel map [string ]chan struct {}
3648
3749 // events is the current batch of unsent file events, which will be sent
3850 // when the timer expires.
@@ -53,11 +65,12 @@ func New(delay time.Duration, logger *slog.Logger, handler func([]protocol.FileE
5365 w := & Watcher {
5466 logger : logger ,
5567 watcher : watcher ,
56- knownDirs : make (map [string ]bool ),
68+ dirCancel : make (map [string ]chan struct {}),
69+ errs : make (chan error ),
5770 stop : make (chan struct {}),
5871 }
5972
60- w .wg .Add (1 )
73+ w .runners .Add (1 )
6174 go w .run (delay , handler )
6275
6376 return w , nil
@@ -66,7 +79,7 @@ func New(delay time.Duration, logger *slog.Logger, handler func([]protocol.FileE
6679// run is the main event-handling loop for the watcher. It should be run in a
6780// separate goroutine.
6881func (w * Watcher ) run (delay time.Duration , handler func ([]protocol.FileEvent , error )) {
69- defer w .wg .Done ()
82+ defer w .runners .Done ()
7083
7184 // timer is used to debounce events.
7285 timer := time .NewTimer (delay )
@@ -78,13 +91,23 @@ func (w *Watcher) run(delay time.Duration, handler func([]protocol.FileEvent, er
7891 return
7992
8093 case <- timer .C :
81- w .sendEvents (handler )
94+ if events := w .drainEvents (); len (events ) > 0 {
95+ handler (events , nil )
96+ }
8297 timer .Reset (delay )
8398
8499 case err , ok := <- w .watcher .Errors :
85100 // When the watcher is closed, its Errors channel is closed, which
86101 // unblocks this case. We continue to the next loop iteration,
87- // allowing the <-w.closed case to handle the shutdown.
102+ // allowing the <-w.stop case to handle the shutdown.
103+ if ! ok {
104+ continue
105+ }
106+ if err != nil {
107+ handler (nil , err )
108+ }
109+
110+ case err , ok := <- w .errs :
88111 if ! ok {
89112 continue
90113 }
@@ -96,10 +119,9 @@ func (w *Watcher) run(delay time.Duration, handler func([]protocol.FileEvent, er
96119 if ! ok {
97120 continue
98121 }
99- // file watcher should not handle the fsnotify.Event concurrently,
100- // the original order should be preserved. E.g. if a file get
101- // deleted and recreated, running concurrently may result it in
102- // reverse order.
122+ // fsnotify.Event should not be handled concurrently, to preserve their
123+ // original order. For example, if a file is deleted and recreated,
124+ // concurrent handling could process the events in reverse order.
103125 //
104126 // Only reset the timer if a relevant event happened.
105127 // https://github.com/fsnotify/fsnotify?tab=readme-ov-file#why-do-i-get-many-chmod-events
@@ -134,10 +156,17 @@ func (w *Watcher) WatchDir(path string) error {
134156 if skipDir (d .Name ()) {
135157 return filepath .SkipDir
136158 }
137- w .addKnownDir (path )
138- if err := w .watchDir (path ); err != nil {
139- // TODO(hxjiang): retry on watch failures.
140- return filepath .SkipDir
159+
160+ done := w .addWatchHandle (path )
161+ if done == nil { // file watcher closing
162+ return filepath .SkipAll
163+ }
164+
165+ errChan := make (chan error , 1 )
166+ w .watchDir (path , done , errChan )
167+
168+ if err := <- errChan ; err != nil {
169+ return err
141170 }
142171 }
143172 return nil
@@ -159,8 +188,8 @@ func (w *Watcher) handleEvent(event fsnotify.Event) *protocol.FileEvent {
159188 } else if os .IsNotExist (err ) {
160189 // Upon deletion, the file/dir has been removed. fsnotify
161190 // does not provide information regarding the deleted item.
162- // Use the watchedDirs to determine whether it's a dir .
163- isDir = w .isKnownDir (path )
191+ // Use watchHandles to determine if the deleted item was a directory .
192+ isDir = w .isWatchedDir (path )
164193 } else {
165194 // If statting failed, something is wrong with the file system.
166195 // Log and move on.
@@ -184,26 +213,25 @@ func (w *Watcher) handleEvent(event fsnotify.Event) *protocol.FileEvent {
184213 fallthrough
185214 case event .Op .Has (fsnotify .Remove ):
186215 // Upon removal, we only need to remove the entries from the map.
187- // The [fsnotify.Watcher] remove the watch for us.
216+ // The [fsnotify.Watcher] removes the watch for us.
188217 // fsnotify/fsnotify#268
189- w .removeKnownDir (path )
218+ w .removeWatchHandle (path )
190219
191220 // TODO(hxjiang): Directory removal events from some LSP clients may
192221 // not include corresponding removal events for child files and
193- // subdirectories. Should we do some filtering when add the dir
222+ // subdirectories. Should we do some filtering when adding the dir
194223 // deletion event to the events slice.
195224 return & protocol.FileEvent {
196225 URI : protocol .URIFromPath (path ),
197226 Type : protocol .Deleted ,
198227 }
199228 case event .Op .Has (fsnotify .Create ):
200- w .addKnownDir (path )
201-
202- // This watch is added asynchronously to prevent a potential deadlock
203- // on Windows. The fsnotify library can block when registering a watch
204- // if its event channel is full (see fsnotify/fsnotify#502).
205- // TODO(hxjiang): retry on watch failure.
206- go w .watchDir (path )
229+ // This watch is added asynchronously to prevent a potential
230+ // deadlock on Windows. See fsnotify/fsnotify#502.
231+ // Error encountered will be sent to internal error channel.
232+ if done := w .addWatchHandle (path ); done != nil {
233+ go w .watchDir (path , done , w .errs )
234+ }
207235
208236 return & protocol.FileEvent {
209237 URI : protocol .URIFromPath (path ),
@@ -244,10 +272,19 @@ func (w *Watcher) handleEvent(event fsnotify.Event) *protocol.FileEvent {
244272 }
245273}
246274
247- // watchDir register the watch for the input dir. This function may be blocking
248- // because of the issue fsnotify/fsnotify#502.
249- func (w * Watcher ) watchDir (path string ) error {
250- // Dir with broken symbolic link can not be watched.
275+ // watchDir registers a watch for a directory, retrying with backoff if it fails.
276+ // It can be canceled by calling removeWatchHandle. On success or cancellation,
277+ // nil is sent to 'errChan'; otherwise, the last error after all retries is sent.
278+ func (w * Watcher ) watchDir (path string , done chan struct {}, errChan chan error ) {
279+ if errChan == nil {
280+ panic ("input error chan is nil" )
281+ }
282+
283+ // On darwin, watching a directory will fail if it contains broken symbolic
284+ // links. This state can occur temporarily during operations like a git
285+ // branch switch. To handle this, we retry multiple times with exponential
286+ // backoff, allowing time for the symbolic link's target to be created.
287+
251288 // TODO(hxjiang): Address a race condition where file or directory creations
252289 // under current directory might be missed between the current directory
253290 // creation and the establishment of the file watch.
@@ -256,26 +293,89 @@ func (w *Watcher) watchDir(path string) error {
256293 // 1. Retrospectively check for and trigger creation events for any new
257294 // files/directories.
258295 // 2. Recursively add watches for any newly created subdirectories.
259- return w .watcher .Add (path )
296+ var (
297+ delay = 500 * time .Millisecond
298+ err error
299+ )
300+
301+ // Watchers wait group becomes done only after errChan send.
302+ w .watchers .Add (1 )
303+ defer func () {
304+ errChan <- err
305+ w .watchers .Done ()
306+ }()
307+
308+ for i := range 5 {
309+ if i > 0 {
310+ select {
311+ case <- time .After (delay ):
312+ delay *= 2
313+ case <- done :
314+ return // cancelled
315+ }
316+ }
317+ // This function may block due to fsnotify/fsnotify#502.
318+ err := w .watcher .Add (path )
319+ if afterAddHook != nil {
320+ afterAddHook (path , err )
321+ }
322+ if err == nil {
323+ return
324+ }
325+ }
260326}
261327
262- func (w * Watcher ) addKnownDir (path string ) {
328+ var afterAddHook func (path string , err error )
329+
330+ // addWatchHandle registers a new directory watch.
331+ // The returned 'done' channel channel should be used to signal cancellation of
332+ // a pending watch.
333+ // It returns nil if the watcher is already closing.
334+ func (w * Watcher ) addWatchHandle (path string ) chan struct {} {
263335 w .mu .Lock ()
264336 defer w .mu .Unlock ()
265- w .knownDirs [path ] = true
337+
338+ if w .dirCancel == nil { // file watcher is closing.
339+ return nil
340+ }
341+
342+ done := make (chan struct {})
343+ w .dirCancel [path ] = done
344+ return done
266345}
267346
268- func (w * Watcher ) removeKnownDir (path string ) {
347+ // removeWatchHandle removes the handle for a directory watch and cancels any
348+ // pending watch attempt for that path.
349+ func (w * Watcher ) removeWatchHandle (path string ) {
269350 w .mu .Lock ()
270351 defer w .mu .Unlock ()
271- delete (w .knownDirs , path )
352+
353+ if done , ok := w .dirCancel [path ]; ok {
354+ delete (w .dirCancel , path )
355+ close (done )
356+ }
357+ }
358+
359+ // close removes all handles and cancels all pending watch attempt for that path
360+ // and set dirCancel to nil which prevent any future watch attempts.
361+ func (w * Watcher ) close () {
362+ w .mu .Lock ()
363+ dirCancel := w .dirCancel
364+ w .dirCancel = nil
365+ w .mu .Unlock ()
366+
367+ for _ , ch := range dirCancel {
368+ close (ch )
369+ }
272370}
273371
274- func (w * Watcher ) isKnownDir (path string ) bool {
372+ // isWatchedDir reports whether the given path has a watch handle, meaning it is
373+ // a directory the watcher is managing.
374+ func (w * Watcher ) isWatchedDir (path string ) bool {
275375 w .mu .Lock ()
276376 defer w .mu .Unlock ()
277377
278- _ , isDir := w .knownDirs [path ]
378+ _ , isDir := w .dirCancel [path ]
279379 return isDir
280380}
281381
@@ -297,27 +397,35 @@ func (w *Watcher) addEvent(event protocol.FileEvent) {
297397 }
298398}
299399
300- func (w * Watcher ) sendEvents ( handler func ( []protocol.FileEvent , error )) {
400+ func (w * Watcher ) drainEvents () []protocol.FileEvent {
301401 w .mu .Lock ()
302402 events := w .events
303403 w .events = nil
304404 w .mu .Unlock ()
305405
306- if len (events ) != 0 {
307- handler (events , nil )
308- }
406+ return events
309407}
310408
311409// Close shuts down the watcher, waits for the internal goroutine to terminate,
312410// and returns any final error.
313411func (w * Watcher ) Close () error {
412+ // Cancel any ongoing watch registration.
413+ w .close ()
414+
415+ // Wait for all watch registration goroutines to finish, including their
416+ // error handling. This ensures that:
417+ // - All [Watcher.watchDir] goroutines have exited and sent their errors, so
418+ // it is safe to close the internal error channel.
419+ // - There are no ongoing [fsnotify.Watcher.Add] calls, so it is safe to
420+ // close the fsnotify watcher (see fsnotify/fsnotify#704).
421+ w .watchers .Wait ()
422+ close (w .errs )
423+
314424 err := w .watcher .Close ()
315425
316- // Wait for the go routine to finish. So all the channels will be closed and
317- // all go routine will be terminated.
426+ // Wait for the main run loop to terminate.
318427 close (w .stop )
319-
320- w .wg .Wait ()
428+ w .runners .Wait ()
321429
322430 return err
323431}
0 commit comments