@@ -89,7 +89,7 @@ async def _get_chunks(url: str, chunk_size: int) -> Generator[bytes, None, None]
8989 pbar .update (len (value ))
9090 pbar .close ()
9191 except JsException :
92- raise Exception (f"Failed to read dataset at { url } " ) from None
92+ raise Exception (f"Failed to read dataset at ' { url } '. " ) from None
9393 else :
9494 import requests # pyright: ignore
9595 from requests .exceptions import ConnectionError # pyright: ignore
@@ -99,7 +99,7 @@ async def _get_chunks(url: str, chunk_size: int) -> Generator[bytes, None, None]
9999 # If requests.get fails, it will return readable error
100100 if response .status_code >= 400 :
101101 raise Exception (
102- f"received status code { response .status_code } from { url } "
102+ f"received status code { response .status_code } from ' { url } '. "
103103 )
104104 pbar = tqdm (
105105 miniters = 1 ,
@@ -111,7 +111,7 @@ async def _get_chunks(url: str, chunk_size: int) -> Generator[bytes, None, None]
111111 pbar .update (len (chunk ))
112112 pbar .close ()
113113 except ConnectionError :
114- raise Exception (f"Failed to read dataset at { url } " ) from None
114+ raise Exception (f"Failed to read dataset at ' { url } '. " ) from None
115115
116116
117117def _rmrf (path : Path ) -> None :
@@ -126,7 +126,7 @@ def _verify_files_dont_exist(
126126) -> None :
127127 """
128128 Verifies all paths in 'paths' don't exist.
129- :param paths: A iterable of pathlib.Paths .
129+ :param paths: A iterable of pathlib.Path s .
130130 :param remove_if_exist=False: Remove each file at each path in paths if they already exist.
131131 :returns: None
132132 :raises FileExistsError: On the first path found that already exists if remove_if_exist is False.
@@ -232,9 +232,9 @@ async def prepare(
232232 path = Path .cwd () if path is None else Path (path )
233233 # Check if path contains /tmp
234234 if Path ("/tmp" ) in path .parents :
235- raise ValueError ("path must not be in /tmp" )
235+ raise ValueError ("path must not be in /tmp. " )
236236 elif path .is_file ():
237- raise ValueError ("Datasets must be prepared to directories, not files" )
237+ raise ValueError ("Datasets must be prepared to directories, not files. " )
238238 # Create the target path if it doesn't exist yet
239239 path .mkdir (exist_ok = True )
240240
@@ -254,39 +254,45 @@ async def prepare(
254254 shutil .rmtree (extract_dir )
255255 extract_dir .mkdir ()
256256
257- if tarfile .is_tarfile (tmp_download_file ):
258- with tarfile .open (tmp_download_file ) as tf :
259- _verify_files_dont_exist (
260- [
261- path / child .name
262- for child in map (Path , tf .getnames ())
263- if len (child .parents ) == 1 and _is_file_to_symlink (child )
264- ], # Only check if top-level fileobject
265- remove_if_exist = overwrite ,
266- )
267- pbar = tqdm (iterable = tf .getmembers (), total = len (tf .getmembers ()))
268- pbar .set_description (f"Extracting { filename } " )
269- for member in pbar :
270- tf .extract (member = member , path = extract_dir )
271- tmp_download_file .unlink ()
272- elif zipfile .is_zipfile (tmp_download_file ):
273- with zipfile .ZipFile (tmp_download_file ) as zf :
274- _verify_files_dont_exist (
275- [
276- path / child .name
277- for child in map (Path , zf .namelist ())
278- if len (child .parents ) == 1 and _is_file_to_symlink (child )
279- ], # Only check if top-level fileobject
280- remove_if_exist = overwrite ,
281- )
282- pbar = tqdm (iterable = zf .infolist (), total = len (zf .infolist ()))
283- pbar .set_description (f"Extracting { filename } " )
284- for member in pbar :
285- zf .extract (member = member , path = extract_dir )
286- tmp_download_file .unlink ()
287- else :
288- _verify_files_dont_exist ([path / filename ], remove_if_exist = overwrite )
289- shutil .move (tmp_download_file , extract_dir / filename )
257+ try :
258+ if tarfile .is_tarfile (tmp_download_file ):
259+ with tarfile .open (tmp_download_file ) as tf :
260+ _verify_files_dont_exist (
261+ [
262+ path / child .name
263+ for child in map (Path , tf .getnames ())
264+ if len (child .parents ) == 1 and _is_file_to_symlink (child )
265+ ], # Only check if top-level fileobject
266+ remove_if_exist = overwrite ,
267+ )
268+ pbar = tqdm (iterable = tf .getmembers (), total = len (tf .getmembers ()))
269+ pbar .set_description (f"Extracting { filename } " )
270+ for member in pbar :
271+ tf .extract (member = member , path = extract_dir )
272+ tmp_download_file .unlink ()
273+ elif zipfile .is_zipfile (tmp_download_file ):
274+ with zipfile .ZipFile (tmp_download_file ) as zf :
275+ _verify_files_dont_exist (
276+ [
277+ path / child .name
278+ for child in map (Path , zf .namelist ())
279+ if len (child .parents ) == 1 and _is_file_to_symlink (child )
280+ ], # Only check if top-level fileobject
281+ remove_if_exist = overwrite ,
282+ )
283+ pbar = tqdm (iterable = zf .infolist (), total = len (zf .infolist ()))
284+ pbar .set_description (f"Extracting { filename } " )
285+ for member in pbar :
286+ zf .extract (member = member , path = extract_dir )
287+ tmp_download_file .unlink ()
288+ else :
289+ _verify_files_dont_exist ([path / filename ], remove_if_exist = overwrite )
290+ shutil .move (tmp_download_file , extract_dir / filename )
291+ except FileExistsError as e :
292+ raise FileExistsError (
293+ str (e )
294+ + "\n If you want to overwrite any existing files, use prepare(..., overwrite=True)."
295+ ) from None
290296
291297 # If in jupyterlite environment, the extract_dir = path, so the files are already there.
292298 if not _is_jupyterlite ():
@@ -304,29 +310,6 @@ def setup() -> None:
304310 if _is_jupyterlite ():
305311 tqdm .monitor_interval = 0
306312
307- try :
308- import sys # pyright: ignore
309-
310- ipython = get_ipython ()
311-
312- def hide_traceback (
313- exc_tuple = None ,
314- filename = None ,
315- tb_offset = None ,
316- exception_only = False ,
317- running_compiled_code = False ,
318- ):
319- etype , value , tb = sys .exc_info ()
320- value .__cause__ = None # suppress chained exceptions
321- return ipython ._showtraceback (
322- etype , value , ipython .InteractiveTB .get_exception_only (etype , value )
323- )
324-
325- ipython .showtraceback = hide_traceback
326-
327- except NameError :
328- pass
329-
330313
331314setup ()
332315
0 commit comments