|
4 | 4 | from __future__ import print_function |
5 | 5 | from __future__ import unicode_literals |
6 | 6 |
|
| 7 | +import sys |
7 | 8 | import typing |
8 | 9 | import zipfile |
9 | 10 | from datetime import datetime |
@@ -51,74 +52,126 @@ def __init__(self, fs, name): # noqa: D107 |
51 | 52 | self._pos = 0 |
52 | 53 | super(_ZipExtFile, self).__init__(_zip.open(name), "r", name) |
53 | 54 |
|
54 | | - def read(self, size=-1): |
55 | | - # type: (int) -> bytes |
56 | | - buf = self._f.read(-1 if size is None else size) |
57 | | - self._pos += len(buf) |
58 | | - return buf |
59 | | - |
60 | | - def read1(self, size=-1): |
61 | | - # type: (int) -> bytes |
62 | | - buf = self._f.read1(-1 if size is None else size) # type: ignore |
63 | | - self._pos += len(buf) |
64 | | - return buf |
65 | | - |
66 | | - def seek(self, offset, whence=Seek.set): |
67 | | - # type: (int, SupportsInt) -> int |
68 | | - """Change stream position. |
69 | | -
|
70 | | - Change the stream position to the given byte offset. The |
71 | | - offset is interpreted relative to the position indicated by |
72 | | - ``whence``. |
73 | | -
|
74 | | - Arguments: |
75 | | - offset (int): the offset to the new position, in bytes. |
76 | | - whence (int): the position reference. Possible values are: |
77 | | - * `Seek.set`: start of stream (the default). |
78 | | - * `Seek.current`: current position; offset may be negative. |
79 | | - * `Seek.end`: end of stream; offset must be negative. |
80 | | -
|
81 | | - Returns: |
82 | | - int: the new absolute position. |
83 | | -
|
84 | | - Raises: |
85 | | - ValueError: when ``whence`` is not known, or ``offset`` |
86 | | - is invalid. |
87 | | -
|
88 | | - Note: |
89 | | - Zip compression does not support seeking, so the seeking |
90 | | - is emulated. Seeking somewhere else than the current position |
91 | | - will need to either: |
92 | | - * reopen the file and restart decompression |
93 | | - * read and discard data to advance in the file |
94 | | -
|
95 | | - """ |
96 | | - _whence = int(whence) |
97 | | - if _whence == Seek.current: |
98 | | - offset += self._pos |
99 | | - if _whence == Seek.current or _whence == Seek.set: |
100 | | - if offset < 0: |
101 | | - raise ValueError("Negative seek position {}".format(offset)) |
102 | | - elif _whence == Seek.end: |
103 | | - if offset > 0: |
104 | | - raise ValueError("Positive seek position {}".format(offset)) |
105 | | - offset += self._end |
106 | | - else: |
107 | | - raise ValueError( |
108 | | - "Invalid whence ({}, should be {}, {} or {})".format( |
109 | | - _whence, Seek.set, Seek.current, Seek.end |
| 55 | + # NOTE(@althonos): Starting from Python 3.7, files inside a Zip archive are |
| 56 | + # seekable provided they were opened from a seekable file |
| 57 | + # handle. Before that, we can emulate a seek using the |
| 58 | + # read method, although it adds a ton of overhead and is |
| 59 | + # way less efficient than extracting once to a BytesIO. |
| 60 | + if sys.version_info < (3, 7): |
| 61 | + |
| 62 | + def read(self, size=-1): |
| 63 | + # type: (int) -> bytes |
| 64 | + buf = self._f.read(-1 if size is None else size) |
| 65 | + self._pos += len(buf) |
| 66 | + return buf |
| 67 | + |
| 68 | + def read1(self, size=-1): |
| 69 | + # type: (int) -> bytes |
| 70 | + buf = self._f.read1(-1 if size is None else size) # type: ignore |
| 71 | + self._pos += len(buf) |
| 72 | + return buf |
| 73 | + |
| 74 | + def tell(self): |
| 75 | + # type: () -> int |
| 76 | + return self._pos |
| 77 | + |
| 78 | + def seekable(self): |
| 79 | + return True |
| 80 | + |
| 81 | + def seek(self, offset, whence=Seek.set): |
| 82 | + # type: (int, SupportsInt) -> int |
| 83 | + """Change stream position. |
| 84 | +
|
| 85 | + Change the stream position to the given byte offset. The |
| 86 | + offset is interpreted relative to the position indicated by |
| 87 | + ``whence``. |
| 88 | +
|
| 89 | + Arguments: |
| 90 | + offset (int): the offset to the new position, in bytes. |
| 91 | + whence (int): the position reference. Possible values are: |
| 92 | + * `Seek.set`: start of stream (the default). |
| 93 | + * `Seek.current`: current position; offset may be negative. |
| 94 | + * `Seek.end`: end of stream; offset must be negative. |
| 95 | +
|
| 96 | + Returns: |
| 97 | + int: the new absolute position. |
| 98 | +
|
| 99 | + Raises: |
| 100 | + ValueError: when ``whence`` is not known, or ``offset`` |
| 101 | + is invalid. |
| 102 | +
|
| 103 | + Note: |
| 104 | + Zip compression does not support seeking, so the seeking |
| 105 | + is emulated. Seeking somewhere else than the current position |
| 106 | + will need to either: |
| 107 | + * reopen the file and restart decompression |
| 108 | + * read and discard data to advance in the file |
| 109 | +
|
| 110 | + """ |
| 111 | + _whence = int(whence) |
| 112 | + if _whence == Seek.current: |
| 113 | + offset += self._pos |
| 114 | + if _whence == Seek.current or _whence == Seek.set: |
| 115 | + if offset < 0: |
| 116 | + raise ValueError("Negative seek position {}".format(offset)) |
| 117 | + elif _whence == Seek.end: |
| 118 | + if offset > 0: |
| 119 | + raise ValueError("Positive seek position {}".format(offset)) |
| 120 | + offset += self._end |
| 121 | + else: |
| 122 | + raise ValueError( |
| 123 | + "Invalid whence ({}, should be {}, {} or {})".format( |
| 124 | + _whence, Seek.set, Seek.current, Seek.end |
| 125 | + ) |
110 | 126 | ) |
111 | | - ) |
112 | 127 |
|
113 | | - if offset < self._pos: |
114 | | - self._f = self._zip.open(self.name) # type: ignore |
115 | | - self._pos = 0 |
116 | | - self.read(offset - self._pos) |
117 | | - return self._pos |
| 128 | + if offset < self._pos: |
| 129 | + self._f = self._zip.open(self.name) # type: ignore |
| 130 | + self._pos = 0 |
| 131 | + self.read(offset - self._pos) |
| 132 | + return self._pos |
| 133 | + |
| 134 | + else: |
| 135 | + |
| 136 | + def seek(self, offset, whence=Seek.set): |
| 137 | + # type: (int, SupportsInt) -> int |
| 138 | + """Change stream position. |
| 139 | +
|
| 140 | + Change the stream position to the given byte offset. The |
| 141 | + offset is interpreted relative to the position indicated by |
| 142 | + ``whence``. |
| 143 | +
|
| 144 | + Arguments: |
| 145 | + offset (int): the offset to the new position, in bytes. |
| 146 | + whence (int): the position reference. Possible values are: |
| 147 | + * `Seek.set`: start of stream (the default). |
| 148 | + * `Seek.current`: current position; offset may be negative. |
| 149 | + * `Seek.end`: end of stream; offset must be negative. |
| 150 | +
|
| 151 | + Returns: |
| 152 | + int: the new absolute position. |
| 153 | +
|
| 154 | + Raises: |
| 155 | + ValueError: when ``whence`` is not known, or ``offset`` |
| 156 | + is invalid. |
| 157 | +
|
| 158 | + """ |
| 159 | + _whence = int(whence) |
| 160 | + _pos = self.tell() |
| 161 | + if _whence == Seek.current or _whence == Seek.set: |
| 162 | + if _pos + offset < 0: |
| 163 | + raise ValueError("Negative seek position {}".format(offset)) |
| 164 | + elif _whence == Seek.end: |
| 165 | + if _pos + offset > 0: |
| 166 | + raise ValueError("Positive seek position {}".format(offset)) |
| 167 | + else: |
| 168 | + raise ValueError( |
| 169 | + "Invalid whence ({}, should be {}, {} or {})".format( |
| 170 | + _whence, Seek.set, Seek.current, Seek.end |
| 171 | + ) |
| 172 | + ) |
118 | 173 |
|
119 | | - def tell(self): |
120 | | - # type: () -> int |
121 | | - return self._pos |
| 174 | + return self._f.seek(offset, _whence) |
122 | 175 |
|
123 | 176 |
|
124 | 177 | class ZipFS(WrapFS): |
@@ -279,7 +332,7 @@ class ReadZipFS(FS): |
279 | 332 | """A readable zip file.""" |
280 | 333 |
|
281 | 334 | _meta = { |
282 | | - "case_insensitive": True, |
| 335 | + "case_insensitive": False, |
283 | 336 | "network": False, |
284 | 337 | "read_only": True, |
285 | 338 | "supports_rename": False, |
|
0 commit comments