Skip to content

Commit 8b72697

Browse files
authored
Update ReadZipFS.openbin handler to use native zipfile code (#527)
* Remove compatibility code for making `_ZipExtFile` seekable starting from Python 3.7 * Mark `ReadZipFS` as a case-sensitive filesystem * Update `CHANGELOG.md` with changes from #527
1 parent f253d9f commit 8b72697

File tree

2 files changed

+124
-66
lines changed

2 files changed

+124
-66
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
1212

1313
- Replaced `appdirs` with `platformdirs` dependency
1414
([#489](https://github.com/PyFilesystem/pyfilesystem2/pull/489)).
15+
- Make `fs.zipfs._ZipExtFile` use the seeking mechanism implemented
16+
in the Python standard library in Python version 3.7 and later
17+
([#527](https://github.com/PyFilesystem/pyfilesystem2/pull/527)).
18+
- Mark `fs.zipfs.ReadZipFS` as a case-sensitive filesystem
19+
([#527](https://github.com/PyFilesystem/pyfilesystem2/pull/527)).
1520

1621

1722
## [2.4.15] - 2022-02-07

fs/zipfs.py

Lines changed: 119 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from __future__ import print_function
55
from __future__ import unicode_literals
66

7+
import sys
78
import typing
89
import zipfile
910
from datetime import datetime
@@ -51,74 +52,126 @@ def __init__(self, fs, name): # noqa: D107
5152
self._pos = 0
5253
super(_ZipExtFile, self).__init__(_zip.open(name), "r", name)
5354

54-
def read(self, size=-1):
55-
# type: (int) -> bytes
56-
buf = self._f.read(-1 if size is None else size)
57-
self._pos += len(buf)
58-
return buf
59-
60-
def read1(self, size=-1):
61-
# type: (int) -> bytes
62-
buf = self._f.read1(-1 if size is None else size) # type: ignore
63-
self._pos += len(buf)
64-
return buf
65-
66-
def seek(self, offset, whence=Seek.set):
67-
# type: (int, SupportsInt) -> int
68-
"""Change stream position.
69-
70-
Change the stream position to the given byte offset. The
71-
offset is interpreted relative to the position indicated by
72-
``whence``.
73-
74-
Arguments:
75-
offset (int): the offset to the new position, in bytes.
76-
whence (int): the position reference. Possible values are:
77-
* `Seek.set`: start of stream (the default).
78-
* `Seek.current`: current position; offset may be negative.
79-
* `Seek.end`: end of stream; offset must be negative.
80-
81-
Returns:
82-
int: the new absolute position.
83-
84-
Raises:
85-
ValueError: when ``whence`` is not known, or ``offset``
86-
is invalid.
87-
88-
Note:
89-
Zip compression does not support seeking, so the seeking
90-
is emulated. Seeking somewhere else than the current position
91-
will need to either:
92-
* reopen the file and restart decompression
93-
* read and discard data to advance in the file
94-
95-
"""
96-
_whence = int(whence)
97-
if _whence == Seek.current:
98-
offset += self._pos
99-
if _whence == Seek.current or _whence == Seek.set:
100-
if offset < 0:
101-
raise ValueError("Negative seek position {}".format(offset))
102-
elif _whence == Seek.end:
103-
if offset > 0:
104-
raise ValueError("Positive seek position {}".format(offset))
105-
offset += self._end
106-
else:
107-
raise ValueError(
108-
"Invalid whence ({}, should be {}, {} or {})".format(
109-
_whence, Seek.set, Seek.current, Seek.end
55+
# NOTE(@althonos): Starting from Python 3.7, files inside a Zip archive are
56+
# seekable provided they were opened from a seekable file
57+
# handle. Before that, we can emulate a seek using the
58+
# read method, although it adds a ton of overhead and is
59+
# way less efficient than extracting once to a BytesIO.
60+
if sys.version_info < (3, 7):
61+
62+
def read(self, size=-1):
63+
# type: (int) -> bytes
64+
buf = self._f.read(-1 if size is None else size)
65+
self._pos += len(buf)
66+
return buf
67+
68+
def read1(self, size=-1):
69+
# type: (int) -> bytes
70+
buf = self._f.read1(-1 if size is None else size) # type: ignore
71+
self._pos += len(buf)
72+
return buf
73+
74+
def tell(self):
75+
# type: () -> int
76+
return self._pos
77+
78+
def seekable(self):
79+
return True
80+
81+
def seek(self, offset, whence=Seek.set):
82+
# type: (int, SupportsInt) -> int
83+
"""Change stream position.
84+
85+
Change the stream position to the given byte offset. The
86+
offset is interpreted relative to the position indicated by
87+
``whence``.
88+
89+
Arguments:
90+
offset (int): the offset to the new position, in bytes.
91+
whence (int): the position reference. Possible values are:
92+
* `Seek.set`: start of stream (the default).
93+
* `Seek.current`: current position; offset may be negative.
94+
* `Seek.end`: end of stream; offset must be negative.
95+
96+
Returns:
97+
int: the new absolute position.
98+
99+
Raises:
100+
ValueError: when ``whence`` is not known, or ``offset``
101+
is invalid.
102+
103+
Note:
104+
Zip compression does not support seeking, so the seeking
105+
is emulated. Seeking somewhere else than the current position
106+
will need to either:
107+
* reopen the file and restart decompression
108+
* read and discard data to advance in the file
109+
110+
"""
111+
_whence = int(whence)
112+
if _whence == Seek.current:
113+
offset += self._pos
114+
if _whence == Seek.current or _whence == Seek.set:
115+
if offset < 0:
116+
raise ValueError("Negative seek position {}".format(offset))
117+
elif _whence == Seek.end:
118+
if offset > 0:
119+
raise ValueError("Positive seek position {}".format(offset))
120+
offset += self._end
121+
else:
122+
raise ValueError(
123+
"Invalid whence ({}, should be {}, {} or {})".format(
124+
_whence, Seek.set, Seek.current, Seek.end
125+
)
110126
)
111-
)
112127

113-
if offset < self._pos:
114-
self._f = self._zip.open(self.name) # type: ignore
115-
self._pos = 0
116-
self.read(offset - self._pos)
117-
return self._pos
128+
if offset < self._pos:
129+
self._f = self._zip.open(self.name) # type: ignore
130+
self._pos = 0
131+
self.read(offset - self._pos)
132+
return self._pos
133+
134+
else:
135+
136+
def seek(self, offset, whence=Seek.set):
137+
# type: (int, SupportsInt) -> int
138+
"""Change stream position.
139+
140+
Change the stream position to the given byte offset. The
141+
offset is interpreted relative to the position indicated by
142+
``whence``.
143+
144+
Arguments:
145+
offset (int): the offset to the new position, in bytes.
146+
whence (int): the position reference. Possible values are:
147+
* `Seek.set`: start of stream (the default).
148+
* `Seek.current`: current position; offset may be negative.
149+
* `Seek.end`: end of stream; offset must be negative.
150+
151+
Returns:
152+
int: the new absolute position.
153+
154+
Raises:
155+
ValueError: when ``whence`` is not known, or ``offset``
156+
is invalid.
157+
158+
"""
159+
_whence = int(whence)
160+
_pos = self.tell()
161+
if _whence == Seek.current or _whence == Seek.set:
162+
if _pos + offset < 0:
163+
raise ValueError("Negative seek position {}".format(offset))
164+
elif _whence == Seek.end:
165+
if _pos + offset > 0:
166+
raise ValueError("Positive seek position {}".format(offset))
167+
else:
168+
raise ValueError(
169+
"Invalid whence ({}, should be {}, {} or {})".format(
170+
_whence, Seek.set, Seek.current, Seek.end
171+
)
172+
)
118173

119-
def tell(self):
120-
# type: () -> int
121-
return self._pos
174+
return self._f.seek(offset, _whence)
122175

123176

124177
class ZipFS(WrapFS):
@@ -279,7 +332,7 @@ class ReadZipFS(FS):
279332
"""A readable zip file."""
280333

281334
_meta = {
282-
"case_insensitive": True,
335+
"case_insensitive": False,
283336
"network": False,
284337
"read_only": True,
285338
"supports_rename": False,

0 commit comments

Comments
 (0)