Skip to content

Commit 25d9c70

Browse files
geoffjukeswillmcgugan
authored andcommitted
Add ContentType detection and Tests for upload ExtraArgs (#35)
* enable server-side bucket to bucket copies The 'copy_object' method permits setting different source and target buckets. When different, a server-side copy takes place between the buckets. Appropriate ACLs must exist in AWS, but this is a quick method for copying keys between buckets within the same account and region. Copying across regions and/or accounts is more complex, and is not enabled with this change. * Allow settable extra_args for uploads and downloads Allow setting upload and download `extra_args` values for the lifetime of the filesystem instance. Ticket #32 has details * Moved args to constructor and removed bucket copy * load unquoted json if value is a string * Expose `acl` and `cache-control` directly and via opener * Small cleanup * (bugfix) Only set if value exists * Boto3 changed from `extra_args` to `ExtraArgs` at some point This has been tested and works. I uploaded a file to a private bucket, setting the max-age and ACL. * Add ContentType Detection with fallback The boto3 `upload_fileobj` never guesses the ContentType (unlike other Boto3 methods). So all files are stored as `binary/octet-stream`. This can problematic when uploading images or audio, and expecting it to load inline. `guess_type` is a lightweight guesser, that only looks at filenames. If this implementation fails to identify a type, it falls back to the defaults `binary/octet-stream` * moved ContentType to ExtraArgs The `client.upload_fileobj` method has `ContentType` in ExtraArgs. * moved _upload_args to method to inject ContentType * Add tests for upload ExtraArgs * fix typo * Fix TypeError if ContentType manually set * fixes #38
1 parent 6d04cd1 commit 25d9c70

File tree

2 files changed

+23
-5
lines changed

2 files changed

+23
-5
lines changed

fs_s3fs/_s3fs.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from ssl import SSLError
1313
import tempfile
1414
import threading
15+
import mimetypes
1516
import json
1617

1718
import boto3
@@ -353,6 +354,13 @@ def _get_object(self, path, key):
353354
else:
354355
return obj
355356

357+
def _upload_args(self, key):
358+
if 'ContentType' not in self.upload_args:
359+
mimetype = mimetypes.guess_type(key)[0] or 'binary/octet-stream'
360+
return dict(ContentType=mimetype, **self.upload_args)
361+
else:
362+
return self.upload_args
363+
356364
@property
357365
def s3(self):
358366
if not hasattr(self._tlocal, 's3'):
@@ -535,7 +543,7 @@ def makedir(self, path, permissions=None, recreate=False):
535543
else:
536544
raise errors.DirectoryExists(path)
537545
with s3errors(path):
538-
self.s3.Object(self._bucket_name, _key).put()
546+
self.s3.Object(self._bucket_name, _key).put(**self._upload_args(_key))
539547
return SubFS(self, path)
540548

541549
def openbin(self, path, mode="r", buffering=-1, **options):
@@ -553,7 +561,7 @@ def on_close_create(s3file):
553561
s3file.raw.seek(0)
554562
with s3errors(path):
555563
self.client.upload_fileobj(
556-
s3file.raw, self._bucket_name, _key, ExtraArgs=self.upload_args
564+
s3file.raw, self._bucket_name, _key, ExtraArgs=self._upload_args(_key)
557565
)
558566
finally:
559567
s3file.raw.close()
@@ -602,7 +610,7 @@ def on_close(s3file):
602610
s3file.raw.seek(0, os.SEEK_SET)
603611
with s3errors(path):
604612
self.client.upload_fileobj(
605-
s3file.raw, self._bucket_name, _key, ExtraArgs=self.upload_args
613+
s3file.raw, self._bucket_name, _key, ExtraArgs=self._upload_args(_key)
606614
)
607615
finally:
608616
s3file.raw.close()
@@ -771,7 +779,7 @@ def setbytes(self, path, contents):
771779
bytes_file = io.BytesIO(contents)
772780
with s3errors(path):
773781
self.client.upload_fileobj(
774-
bytes_file, self._bucket_name, _key, ExtraArgs=self.upload_args
782+
bytes_file, self._bucket_name, _key, ExtraArgs=self._upload_args(_key)
775783
)
776784

777785
def setbinfile(self, path, file):
@@ -789,7 +797,7 @@ def setbinfile(self, path, file):
789797
pass
790798

791799
with s3errors(path):
792-
self.client.upload_fileobj(file, self._bucket_name, _key, ExtraArgs=self.upload_args)
800+
self.client.upload_fileobj(file, self._bucket_name, _key, self._upload_args(_key))
793801

794802
def copy(self, src_path, dst_path, overwrite=False):
795803
if not overwrite and self.exists(dst_path):

fs_s3fs/tests/test_s3fs.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,13 @@ def test_path_to_key_subdir(self):
6969
self.assertEqual(s3._path_to_key('foo.bar'), 'dir/foo.bar')
7070
self.assertEqual(s3._path_to_key('foo/bar'), 'dir/foo/bar')
7171

72+
def test_upload_args(self):
73+
s3 = S3FS('foo', acl='acl', cache_control='cc')
74+
self.assertDictEqual(s3._upload_args('test.jpg'),
75+
{'ACL': 'acl', 'CacheControl': 'cc', 'ContentType': 'image/jpeg'})
76+
self.assertDictEqual(s3._upload_args('test.mp3'),
77+
{'ACL': 'acl', 'CacheControl': 'cc', 'ContentType': 'audio/mpeg'})
78+
self.assertDictEqual(s3._upload_args('test.json'),
79+
{'ACL': 'acl', 'CacheControl': 'cc', 'ContentType': 'application/json'})
80+
self.assertDictEqual(s3._upload_args('unknown.ext'),
81+
{'ACL': 'acl', 'CacheControl': 'cc', 'ContentType': 'binary/octet-stream'})

0 commit comments

Comments
 (0)