Skip to content

Commit 39040c2

Browse files
authored
Make ArbitraryFileIdManager filesystem-agnostic and fix Windows CI (#46)
* test: normalize case of path based on FID manager class * Remove use of os.path from ArbitraryFileIdManager * Make ArbitraryFileIdManager filesystem agnostic * Normalize root_dir when looking for relative path * Optimize timestamp checks by selecting columns prior * Fix test path normalization * Return api-based paths from test fixures * Apply appropriate normalization in get_id_nosync utility * Use same path normalization as FileId manager implementations in get_id_nosync * Skip symlink test on Windows Python 3.7 for now * Restore pypy as commented out * Apply review comments * Normalize root_dir when set, remove unnecessary normalize_path in tests
1 parent d6a3c90 commit 39040c2

File tree

3 files changed

+184
-94
lines changed

3 files changed

+184
-94
lines changed

.github/workflows/test-python.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@ jobs:
1919
matrix:
2020
os:
2121
- ubuntu-latest
22-
# - windows-latest
22+
- windows-latest
2323
- macos-latest
2424
python-version: ["3.7", "3.10"]
2525
include:
26-
# - os: windows-latest
27-
# python-version: "3.9"
26+
- os: windows-latest
27+
python-version: "3.9"
2828
# - os: ubuntu-latest
2929
# python-version: "pypy-3.8"
3030
- os: macos-latest

jupyter_server_fileid/manager.py

Lines changed: 113 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import posixpath
23
import sqlite3
34
import stat
45
import time
@@ -78,63 +79,50 @@ def _validate_db_path(self, proposal):
7879
def _uuid() -> str:
7980
return str(uuid.uuid4())
8081

82+
@abstractmethod
8183
def _normalize_path(self, path: str) -> str:
82-
"""Accepts an API path and returns a filesystem path, i.e. one prefixed
83-
by root_dir and uses os.path.sep."""
84-
# use commonprefix instead of commonpath, since root_dir may not be a
85-
# absolute POSIX path.
86-
if os.path.commonprefix([self.root_dir, path]) != self.root_dir:
87-
path = os.path.join(self.root_dir, path)
88-
89-
return path
84+
"""Accepts an API path and returns a "persistable" path, i.e. one prefixed
85+
by root_dir that can then be persisted in a format relative to the implementation."""
86+
pass
9087

88+
@abstractmethod
9189
def _from_normalized_path(self, path: Optional[str]) -> Optional[str]:
92-
"""Accepts a filesystem path and returns an API path, i.e. one relative
90+
"""Accepts a "persistable" path and returns an API path, i.e. one relative
9391
to root_dir and uses forward slashes as the path separator. Returns
9492
`None` if the given path is None or is not relative to root_dir."""
95-
if path is None:
96-
return None
97-
98-
if os.path.commonprefix([self.root_dir, path]) != self.root_dir:
99-
return None
100-
101-
relpath = os.path.relpath(path, self.root_dir)
102-
# always use forward slashes to delimit children
103-
relpath = relpath.replace(os.path.sep, "/")
104-
105-
return relpath
93+
pass
10694

107-
def _move_recursive(self, old_path: str, new_path: str, sep: str = os.path.sep) -> None:
95+
def _move_recursive(self, old_path: str, new_path: str, path_mgr: Any = os.path) -> None:
10896
"""Move all children of a given directory at `old_path` to a new
10997
directory at `new_path`, delimited by `sep`."""
110-
old_path_glob = old_path + sep + "*"
98+
old_path_glob = old_path + path_mgr.sep + "*"
11199
records = self.con.execute(
112100
"SELECT id, path FROM Files WHERE path GLOB ?", (old_path_glob,)
113101
).fetchall()
114102

115103
for record in records:
116104
id, old_recpath = record
117-
new_recpath = os.path.join(new_path, os.path.relpath(old_recpath, start=old_path))
105+
new_recpath = path_mgr.join(new_path, path_mgr.relpath(old_recpath, start=old_path))
118106
self.con.execute("UPDATE Files SET path = ? WHERE id = ?", (new_recpath, id))
119107

120-
def _copy_recursive(self, from_path: str, to_path: str, sep: str = os.path.sep) -> None:
108+
def _copy_recursive(self, from_path: str, to_path: str, path_mgr: Any = os.path) -> None:
121109
"""Copy all children of a given directory at `from_path` to a new
122110
directory at `to_path`, delimited by `sep`."""
123-
from_path_glob = from_path + sep + "*"
111+
from_path_glob = from_path + path_mgr.sep + "*"
124112
records = self.con.execute(
125113
"SELECT path FROM Files WHERE path GLOB ?", (from_path_glob,)
126114
).fetchall()
127115

128116
for record in records:
129117
(from_recpath,) = record
130-
to_recpath = os.path.join(to_path, os.path.relpath(from_recpath, start=from_path))
118+
to_recpath = path_mgr.join(to_path, path_mgr.relpath(from_recpath, start=from_path))
131119
self.con.execute(
132120
"INSERT INTO Files (id, path) VALUES (?, ?)", (self._uuid(), to_recpath)
133121
)
134122

135-
def _delete_recursive(self, path: str, sep: str = os.path.sep) -> None:
123+
def _delete_recursive(self, path: str, path_mgr: Any = os.path) -> None:
136124
"""Delete all children of a given directory, delimited by `sep`."""
137-
path_glob = path + sep + "*"
125+
path_glob = path + path_mgr.sep + "*"
138126
self.con.execute("DELETE FROM Files WHERE path GLOB ?", (path_glob,))
139127

140128
@abstractmethod
@@ -232,6 +220,15 @@ class ArbitraryFileIdManager(BaseFileIdManager):
232220
Server 2.
233221
"""
234222

223+
@validate("root_dir")
224+
def _validate_root_dir(self, proposal):
225+
# Convert root_dir to an api path, since that's essentially what we persist.
226+
if proposal["value"] is None:
227+
return ""
228+
229+
normalized_content_root = self._normalize_separators(proposal["value"])
230+
return normalized_content_root
231+
235232
def __init__(self, *args, **kwargs):
236233
# pass args and kwargs to parent Configurable
237234
super().__init__(*args, **kwargs)
@@ -254,6 +251,41 @@ def __init__(self, *args, **kwargs):
254251
self.con.execute("CREATE INDEX IF NOT EXISTS ix_Files_path ON Files (path)")
255252
self.con.commit()
256253

254+
@staticmethod
255+
def _normalize_separators(path):
256+
"""Replaces backslashes with forward slashes, removing adjacent slashes."""
257+
258+
parts = path.strip("\\").split("\\")
259+
return "/".join(parts)
260+
261+
def _normalize_path(self, path):
262+
"""Accepts an API path and returns a "persistable" path, i.e. one prefixed
263+
by root_dir that can then be persisted in a format relative to the implementation."""
264+
# use commonprefix instead of commonpath, since root_dir may not be a
265+
# absolute POSIX path.
266+
267+
# norm_root_dir = self._normalize_separators(self.root_dir)
268+
path = self._normalize_separators(path)
269+
if posixpath.commonprefix([self.root_dir, path]) != self.root_dir:
270+
path = posixpath.join(self.root_dir, path)
271+
272+
return path
273+
274+
def _from_normalized_path(self, path: Optional[str]) -> Optional[str]:
275+
"""Accepts a "persistable" path and returns an API path, i.e. one relative
276+
to root_dir and uses forward slashes as the path separator. Returns
277+
`None` if the given path is None or is not relative to root_dir."""
278+
if path is None:
279+
return None
280+
281+
# Convert root_dir to an api path, since that's essentially what we persist.
282+
# norm_root_dir = self._normalize_separators(self.root_dir)
283+
if posixpath.commonprefix([self.root_dir, path]) != self.root_dir:
284+
return None
285+
286+
relpath = posixpath.relpath(path, self.root_dir)
287+
return relpath
288+
257289
def _create(self, path: str) -> str:
258290
path = self._normalize_path(path)
259291
id = self._uuid()
@@ -291,7 +323,7 @@ def move(self, old_path: str, new_path: str) -> None:
291323

292324
if id:
293325
self.con.execute("UPDATE Files SET path = ? WHERE path = ?", (new_path, old_path))
294-
self._move_recursive(old_path, new_path, "/")
326+
self._move_recursive(old_path, new_path, posixpath)
295327
else:
296328
id = self._create(new_path)
297329

@@ -303,7 +335,7 @@ def copy(self, from_path: str, to_path: str) -> Optional[str]:
303335
to_path = self._normalize_path(to_path)
304336

305337
id = self._create(to_path)
306-
self._copy_recursive(from_path, to_path, "/")
338+
self._copy_recursive(from_path, to_path, posixpath)
307339

308340
self.con.commit()
309341
return id
@@ -312,7 +344,7 @@ def delete(self, path: str) -> None:
312344
path = self._normalize_path(path)
313345

314346
self.con.execute("DELETE FROM Files WHERE path = ?", (path,))
315-
self._delete_recursive(path, "/")
347+
self._delete_recursive(path, posixpath)
316348

317349
self.con.commit()
318350

@@ -395,12 +427,32 @@ def __init__(self, *args, **kwargs):
395427
self.con.commit()
396428

397429
def _normalize_path(self, path):
398-
path = super()._normalize_path(path)
430+
"""Accepts an API path and returns a filesystem path, i.e. one prefixed by root_dir."""
431+
if os.path.commonprefix([self.root_dir, path]) != self.root_dir:
432+
path = os.path.join(self.root_dir, path)
433+
399434
path = os.path.normcase(path)
400435
path = os.path.normpath(path)
401-
402436
return path
403437

438+
def _from_normalized_path(self, path: Optional[str]) -> Optional[str]:
439+
"""Accepts a "persisted" filesystem path and returns an API path, i.e.
440+
one relative to root_dir and uses forward slashes as the path separator.
441+
Returns `None` if the given path is None or is not relative to root_dir.
442+
"""
443+
if path is None:
444+
return None
445+
446+
norm_root_dir = os.path.normcase(self.root_dir)
447+
if os.path.commonprefix([norm_root_dir, path]) != norm_root_dir:
448+
return None
449+
450+
relpath = os.path.relpath(path, norm_root_dir)
451+
# always use forward slashes to delimit children
452+
relpath = relpath.replace(os.path.sep, "/")
453+
454+
return relpath
455+
404456
def _index_all(self):
405457
"""Recursively indexes all directories under the server root."""
406458
self._index_dir_recursively(self.root_dir, self._stat(self.root_dir))
@@ -495,19 +547,10 @@ def _sync_dir(self, dir_path):
495547

496548
scan_iter.close()
497549

498-
def _check_timestamps(self, stat_info):
550+
def _check_timestamps(self, stat_info, src_crtime, src_mtime):
499551
"""Returns True if the timestamps of a file match those recorded in the
500552
Files table. Returns False otherwise."""
501553

502-
src = self.con.execute(
503-
"SELECT crtime, mtime FROM Files WHERE ino = ?", (stat_info.ino,)
504-
).fetchone()
505-
506-
# if no record with matching ino, then return None
507-
if not src:
508-
return False
509-
510-
src_crtime, src_mtime = src
511554
src_timestamp = src_crtime if src_crtime is not None else src_mtime
512555
dst_timestamp = stat_info.crtime if stat_info.crtime is not None else stat_info.mtime
513556
return src_timestamp == dst_timestamp
@@ -545,16 +588,16 @@ def _sync_file(self, path, stat_info):
545588
return None
546589

547590
src = self.con.execute(
548-
"SELECT id, path FROM Files WHERE ino = ?", (stat_info.ino,)
591+
"SELECT id, path, crtime, mtime FROM Files WHERE ino = ?", (stat_info.ino,)
549592
).fetchone()
550593

551594
# if ino is not in database, return None
552595
if src is None:
553596
return None
554-
id, old_path = src
597+
id, old_path, crtime, mtime = src
555598

556599
# if timestamps don't match, delete existing record and return None
557-
if not self._check_timestamps(stat_info):
600+
if not self._check_timestamps(stat_info, crtime, mtime):
558601
self.con.execute("DELETE FROM Files WHERE id = ?", (id,))
559602
return None
560603

@@ -699,38 +742,33 @@ def get_path(self, id):
699742
prior to calling `get_path()`.
700743
"""
701744
# optimistic approach: first check to see if path was not yet moved
702-
row = self.con.execute("SELECT path, ino FROM Files WHERE id = ?", (id,)).fetchone()
745+
for retry in [True, False]:
746+
row = self.con.execute(
747+
"SELECT path, ino, crtime, mtime FROM Files WHERE id = ?", (id,)
748+
).fetchone()
703749

704-
# if file ID does not exist, return None
705-
if not row:
706-
return None
707-
708-
path, ino = row
709-
stat_info = self._stat(path)
750+
# if file ID does not exist, return None
751+
if not row:
752+
return None
710753

711-
if stat_info and ino == stat_info.ino and self._check_timestamps(stat_info):
712-
# if file already exists at path and the ino and timestamps match,
713-
# then return the correct path immediately (best case)
714-
return self._from_normalized_path(path)
715-
716-
# otherwise, try again after calling _sync_all() to sync the Files table to the file tree
717-
self._sync_all()
718-
row = self.con.execute("SELECT path, ino FROM Files WHERE id = ?", (id,)).fetchone()
719-
# file ID already guaranteed to exist from previous check
720-
path, ino = row
721-
722-
# if file no longer exists at path, return None
723-
stat_info = self._stat(path)
724-
if stat_info is None:
725-
return None
726-
727-
# if inode numbers or timestamps of the file and record don't match,
728-
# return None
729-
if ino != stat_info.ino or not self._check_timestamps(stat_info):
730-
return None
754+
path, ino, crtime, mtime = row
755+
stat_info = self._stat(path)
731756

732-
# finally, convert the path to a relative one and return it
733-
return self._from_normalized_path(path)
757+
if (
758+
stat_info
759+
and ino == stat_info.ino
760+
and self._check_timestamps(stat_info, crtime, mtime)
761+
):
762+
# if file already exists at path and the ino and timestamps match,
763+
# then return the correct path immediately (best case)
764+
return self._from_normalized_path(path)
765+
766+
# otherwise, try again after calling _sync_all() to sync the Files table to the file tree
767+
if retry:
768+
self._sync_all()
769+
770+
# If we're here, the retry didn't work.
771+
return None
734772

735773
@log(
736774
lambda self, old_path, new_path: f"Updating index following move from {old_path} to {new_path}.",

0 commit comments

Comments
 (0)