@@ -146,6 +146,7 @@ async def download(
146146 >>> import skillsnetwork
147147 >>> path = "./my_file.txt"
148148 >>> await skillsnetwork.download("https://example.com/myfile", path)
149+ Saved as './my_file.txt'
149150 >>> with open(path, "r") as f:
150151 >>> content = f.read()
151152
@@ -166,7 +167,7 @@ async def download(
166167 async for chunk in _get_chunks (url , chunk_size ):
167168 f .write (chunk )
168169 if verbose :
169- print (relpath (path .resolve ()))
170+ print (f"Saved as ' { relpath (path .resolve ())} '" )
170171
171172
172173async def read (url : str , chunk_size : int = DEFAULT_CHUNK_SIZE ) -> bytes :
@@ -189,39 +190,46 @@ async def read(url: str, chunk_size: int = DEFAULT_CHUNK_SIZE) -> bytes:
189190async def prepare (url : str , path : Optional [str ] = None , verbose : bool = True ) -> None :
190191 """
191192 Prepares a dataset for learners. Downloads a dataset from the given url,
192- decompresses it if necessary, and symlinks it so it's available in the desired path.
193+ decompresses it if necessary. If not using jupyterlite, will extract to
194+ /tmp and and symlink it so it's available at the desired path.
193195
194196 >>> import skillsnetwork
195197 >>> await skillsnetwork.prepare("https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-ML0187EN-SkillsNetwork/labs/module%203/images/images.tar.gz")
198+ Saved to '.'
196199
197200 :param url: The URL to download the dataset from.
198201 :param path: The path the dataset will be available at. Current working directory by default.
199202 :raise InvalidURLException: When URL is invalid.
200203 :raise FileExistsError: it raises this when a file to be symlinked already exists.
201- :raise ValueError: When requested path is in /tmp.
204+ :raise ValueError: When requested path is in /tmp, or cannot be saved to path .
202205 """
203206
204207 filename = Path (urlparse (url ).path ).name
205208 path = Path .cwd () if path is None else Path (path )
206209 # Check if path contains /tmp
207210 if Path ("/tmp" ) in path .parents :
208211 raise ValueError ("path must not be in /tmp" )
212+ elif path .is_file ():
213+ raise ValueError ("Datasets must be prepared to directories, not files" )
209214 # Create the target path if it doesn't exist yet
210215 path .mkdir (exist_ok = True )
211216
212217 # For avoiding collisions with any other files the user may have downloaded to /tmp/
213- tmp_extract_dir = Path (f"/tmp/skills-network-{ hash (url )} " )
214- tmp_download_file = Path (f"/tmp/{ tmp_extract_dir .name } -{ filename } " )
218+
219+ dname = f"skills-network-{ hash (url )} "
220+ # The file to extract data to. If not jupyterlite, to be symlinked to as well
221+ extract_dir = path if _is_jupyterlite () else Path (f"/tmp/{ dname } " )
222+ # The file to download the (possibly) compressed data to
223+ tmp_download_file = Path (f"/tmp/{ dname } -{ filename } " )
215224 # Download the dataset to tmp_download_file file
216225 # File will be overwritten if it already exists
217226 await download (url , tmp_download_file , verbose = False )
218227
219- # Delete tmp_extract_dir directory if it already exists
220- if tmp_extract_dir .is_dir ():
221- shutil .rmtree (tmp_extract_dir )
222-
223- # Create tmp_extract_dir
224- tmp_extract_dir .mkdir ()
228+ # Delete extract_dir directory if it already exists
229+ if not _is_jupyterlite ():
230+ if extract_dir .is_dir ():
231+ shutil .rmtree (extract_dir )
232+ extract_dir .mkdir ()
225233
226234 if tarfile .is_tarfile (tmp_download_file ):
227235 with tarfile .open (tmp_download_file ) as tf :
@@ -235,7 +243,7 @@ async def prepare(url: str, path: Optional[str] = None, verbose: bool = True) ->
235243 pbar = tqdm (iterable = tf .getmembers (), total = len (tf .getmembers ()))
236244 pbar .set_description (f"Extracting { filename } " )
237245 for member in pbar :
238- tf .extract (member = member , path = tmp_extract_dir )
246+ tf .extract (member = member , path = extract_dir )
239247 tmp_download_file .unlink ()
240248 elif zipfile .is_zipfile (tmp_download_file ):
241249 with zipfile .ZipFile (tmp_download_file ) as zf :
@@ -249,18 +257,20 @@ async def prepare(url: str, path: Optional[str] = None, verbose: bool = True) ->
249257 pbar = tqdm (iterable = zf .infolist (), total = len (zf .infolist ()))
250258 pbar .set_description (f"Extracting { filename } " )
251259 for member in pbar :
252- zf .extract (member = member , path = tmp_extract_dir )
260+ zf .extract (member = member , path = extract_dir )
253261 tmp_download_file .unlink ()
254262 else :
255- _verify_files_dont_exist ([path / tmp_download_file . name ])
256- pass # No extraction necessary
263+ _verify_files_dont_exist ([path / filename ])
264+ shutil . move ( tmp_download_file , extract_dir / filename )
257265
258- # Now symlink top-level file objects in tmp_extract_dir
259- for child in filter (_is_file_to_symlink , tmp_extract_dir .iterdir ()):
260- (path / child .name ).symlink_to (child , target_is_directory = child .is_dir ())
266+ # If in jupyterlite environment, the extract_dir = path, so the files are already there.
267+ if not _is_jupyterlite ():
268+ # If not in jupyterlite environment, symlink top-level file objects in extract_dir
269+ for child in filter (_is_file_to_symlink , extract_dir .iterdir ()):
270+ (path / child .name ).symlink_to (child , target_is_directory = child .is_dir ())
261271
262272 if verbose :
263- print (relpath (path .resolve ()))
273+ print (f"Saved to ' { relpath (path .resolve ())} '" )
264274
265275
266276if _is_jupyterlite ():
0 commit comments