Skip to content

Commit ae01f46

Browse files
authored
[SYNPY-1613]allow download from presigned url (#1249)
* allow download from presigned url
1 parent 63da840 commit ae01f46

File tree

4 files changed

+1098
-88
lines changed

4 files changed

+1098
-88
lines changed

synapseclient/core/download/download_async.py

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -49,21 +49,23 @@ class DownloadRequest(NamedTuple):
4949
A request to download a file from Synapse
5050
5151
Attributes:
52-
file_handle_id : The file handle ID to download.
53-
object_id : The Synapse object this file associated to.
52+
file_handle_id : The file handle ID to download. Defaults to None.
53+
object_id : The Synapse object this file associated to. Defaults to None.
5454
object_type : The type of the associated Synapse object. Any of
55-
<https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/model/file/FileHandleAssociateType.html>
55+
<https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/model/file/FileHandleAssociateType.html>. Defaults to None.
5656
path : The local path to download the file to.
5757
This path can be either an absolute path or
58-
a relative path from where the code is executed to the download location.
59-
debug: A boolean to specify if debug mode is on.
58+
a relative path from where the code is executed to the download location. Defaults to None.
59+
debug: A boolean to specify if debug mode is on. Defaults to False.
60+
presigned_url: Optional information about a presigned url to download the file. Defaults to None.
6061
"""
6162

62-
file_handle_id: int
63-
object_id: str
64-
object_type: str
65-
path: str
63+
file_handle_id: int = None
64+
object_id: str = None
65+
object_type: str = None
66+
path: str = None
6667
debug: bool = False
68+
presigned_url: Optional["PresignedUrlInfo"] = None
6769

6870

6971
async def download_file(
@@ -295,7 +297,16 @@ async def download_file(self) -> None:
295297
"""
296298
Splits up and downloads a file in chunks from a URL.
297299
"""
298-
url_provider = PresignedUrlProvider(self._syn, request=self._download_request)
300+
if self._download_request.presigned_url is not None:
301+
url_provider = PresignedUrlProvider(
302+
self._syn,
303+
request=self._download_request,
304+
_cached_info=self._download_request.presigned_url,
305+
)
306+
else:
307+
url_provider = PresignedUrlProvider(
308+
self._syn, request=self._download_request
309+
)
299310

300311
file_size = await with_retry_time_based_async(
301312
function=lambda: _get_file_size_wrapper(
@@ -307,9 +318,14 @@ async def download_file(self) -> None:
307318
retry_max_wait_before_failure=30,
308319
read_response_content=False,
309320
)
321+
# set postfix to object_id if not presigned url, otherwise set to file_name
322+
if self._download_request.presigned_url is None:
323+
postfix = self._download_request.object_id
324+
else:
325+
postfix = self._download_request.presigned_url.file_name
310326
self._progress_bar = get_or_create_download_progress_bar(
311327
file_size=file_size,
312-
postfix=self._download_request.object_id,
328+
postfix=postfix,
313329
synapse_client=self._syn,
314330
)
315331
self._prep_file()

synapseclient/core/download/download_functions.py

Lines changed: 64 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from synapseclient.core.download import (
3232
SYNAPSE_DEFAULT_DOWNLOAD_PART_SIZE,
3333
DownloadRequest,
34+
PresignedUrlInfo,
3435
PresignedUrlProvider,
3536
_pre_signed_url_expiration_time,
3637
download_file,
@@ -681,30 +682,31 @@ def download_fn(
681682

682683

683684
async def download_from_url_multi_threaded(
684-
file_handle_id: str,
685-
object_id: str,
686-
object_type: str,
687685
destination: str,
686+
file_handle_id: Optional[str] = None,
687+
object_id: Optional[str] = None,
688+
object_type: Optional[str] = None,
688689
*,
689690
expected_md5: str = None,
690691
synapse_client: Optional["Synapse"] = None,
692+
presigned_url: Optional[PresignedUrlInfo] = None,
691693
) -> str:
692694
"""
693695
Download a file from the given URL using multiple threads.
694696
695697
Arguments:
696-
file_handle_id: The id of the FileHandle to download
697-
object_id: The id of the Synapse object that uses the FileHandle
698+
destination: The destination on local file system
699+
file_handle_id: Optional. The id of the FileHandle to download
700+
object_id: Optional. The id of the Synapse object that uses the FileHandle
698701
e.g. "syn123"
699-
object_type: The type of the Synapse object that uses the
702+
object_type: Optional. The type of the Synapse object that uses the
700703
FileHandle e.g. "FileEntity". Any of
701704
<https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/model/file/FileHandleAssociateType.html>
702-
destination: The destination on local file system
703705
expected_md5: The expected MD5
704-
content_size: The size of the content
705706
synapse_client: If not passed in and caching was not disabled by
706707
`Synapse.allow_client_caching(False)` this will use the last created
707708
instance from the Synapse class constructor.
709+
presigned_url: Optional. PresignedUrlInfo object if given, the URL is already a pre-signed URL.
708710
Raises:
709711
SynapseMd5MismatchError: If the actual MD5 does not match expected MD5.
710712
@@ -718,19 +720,38 @@ async def download_from_url_multi_threaded(
718720
temp_destination = utils.temp_download_filename(
719721
destination=destination, file_handle_id=file_handle_id
720722
)
723+
# check if the presigned url is expired
724+
if presigned_url is not None:
725+
if (
726+
presigned_url.expiration_utc
727+
< datetime.datetime.now(tz=datetime.timezone.utc)
728+
+ PresignedUrlProvider._TIME_BUFFER
729+
):
730+
raise SynapseError(
731+
"The provided pre-signed URL has expired. Please provide a new pre-signed URL."
732+
)
721733

722-
request = DownloadRequest(
723-
file_handle_id=int(file_handle_id),
724-
object_id=object_id,
725-
object_type=object_type,
726-
path=temp_destination,
727-
debug=client.debug,
728-
)
729-
730-
await download_file(
731-
client=client,
732-
download_request=request,
733-
)
734+
if not presigned_url.file_name:
735+
raise SynapseError("The provided pre-signed URL is missing the file name.")
736+
737+
if os.path.isdir(destination):
738+
# If the destination is a directory, then the file name should be the same as the file name in the presigned url
739+
# This is added to ensure the temp file can be copied to the desired destination without changing the file name
740+
destination = os.path.join(destination, presigned_url.file_name)
741+
request = DownloadRequest(
742+
path=temp_destination,
743+
debug=client.debug,
744+
presigned_url=presigned_url,
745+
)
746+
else:
747+
request = DownloadRequest(
748+
file_handle_id=int(file_handle_id),
749+
object_id=object_id,
750+
object_type=object_type,
751+
path=temp_destination,
752+
debug=client.debug,
753+
)
754+
await download_file(client=client, download_request=request)
734755

735756
if expected_md5: # if md5 not set (should be the case for all except http download)
736757
actual_md5 = utils.md5_for_file_hex(filename=temp_destination)
@@ -753,11 +774,12 @@ async def download_from_url_multi_threaded(
753774
def download_from_url(
754775
url: str,
755776
destination: str,
756-
entity_id: Optional[str],
757-
file_handle_associate_type: Optional[str],
777+
entity_id: Optional[str] = None,
778+
file_handle_associate_type: Optional[str] = None,
758779
file_handle_id: Optional[str] = None,
759780
expected_md5: Optional[str] = None,
760781
progress_bar: Optional[tqdm] = None,
782+
url_is_presigned: Optional[bool] = False,
761783
*,
762784
synapse_client: Optional["Synapse"] = None,
763785
) -> Union[str, None]:
@@ -767,15 +789,17 @@ def download_from_url(
767789
Arguments:
768790
url: The source of download
769791
destination: The destination on local file system
770-
entity_id: The id of the Synapse object that uses the FileHandle
792+
entity_id: Optional. The id of the Synapse object that uses the FileHandle
771793
e.g. "syn123"
772-
file_handle_associate_type: The type of the Synapse object that uses the
794+
file_handle_associate_type: Optional. The type of the Synapse object that uses the
773795
FileHandle e.g. "FileEntity". Any of
774796
<https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/model/file/FileHandleAssociateType.html>
775797
file_handle_id: Optional. If given, the file will be given a temporary name that includes the file
776798
handle id which allows resuming partial downloads of the same file from previous
777799
sessions
778800
expected_md5: Optional. If given, check that the MD5 of the downloaded file matches the expected MD5
801+
progress_bar: Optional progress bar to update during download
802+
url_is_presigned: If True, the URL is already a pre-signed URL.
779803
synapse_client: If not passed in and caching was not disabled by
780804
`Synapse.allow_client_caching(False)` this will use the last created
781805
instance from the Synapse class constructor.
@@ -797,7 +821,10 @@ def download_from_url(
797821
actual_md5 = None
798822
redirect_count = 0
799823
delete_on_md5_mismatch = True
800-
client.logger.debug(f"[{entity_id}]: Downloading from {url} to {destination}")
824+
if entity_id:
825+
client.logger.debug(f"[{entity_id}]: Downloading from {url} to {destination}")
826+
else:
827+
client.logger.debug(f"Downloading from {url} to {destination}")
801828
span = trace.get_current_span()
802829

803830
if file_handle_id:
@@ -904,13 +931,18 @@ def _ftp_report_hook(
904931
url
905932
)
906933
if url_is_expired:
907-
response = get_file_handle_for_download(
908-
file_handle_id=file_handle_id,
909-
synapse_id=entity_id,
910-
entity_type=file_handle_associate_type,
911-
synapse_client=client,
912-
)
913-
url = response["preSignedURL"]
934+
if url_is_presigned:
935+
raise SynapseError(
936+
"The provided pre-signed URL has expired. Please provide a new pre-signed URL."
937+
)
938+
else:
939+
response = get_file_handle_for_download(
940+
file_handle_id=file_handle_id,
941+
synapse_id=entity_id,
942+
entity_type=file_handle_associate_type,
943+
synapse_client=client,
944+
)
945+
url = response["preSignedURL"]
914946
response = with_retry(
915947
lambda url=url, range_header=range_header, auth=auth: client._requests_session.get(
916948
url=url,

0 commit comments

Comments
 (0)