@@ -85,7 +85,7 @@ def _is_s3_dir(self, dir_path):
8585 prefix = util .ensure_suffix (dir_path , "/" )
8686 return self ._is_s3_prefix (prefix )
8787
88- def _get_matching_s3_objects_generator (self , prefix = "" , suffix = "" ):
88+ def _get_matching_s3_objects_generator (self , prefix = "" , suffix = "" , include_dir_objects = False ):
8989 kwargs = {"Bucket" : self .bucket , "Prefix" : prefix }
9090
9191 while True :
@@ -97,16 +97,20 @@ def _get_matching_s3_objects_generator(self, prefix="", suffix=""):
9797
9898 for obj in contents :
9999 key = obj ["Key" ]
100- if key .startswith (prefix ) and key .endswith (suffix ):
100+ if (
101+ key .startswith (prefix )
102+ and key .endswith (suffix )
103+ and (include_dir_objects or not key .endswith ("/" ))
104+ ):
101105 yield obj
102106
103107 try :
104108 kwargs ["ContinuationToken" ] = resp ["NextContinuationToken" ]
105109 except KeyError :
106110 break
107111
108- def _get_matching_s3_keys_generator (self , prefix = "" , suffix = "" ):
109- for obj in self ._get_matching_s3_objects_generator (prefix , suffix ):
112+ def _get_matching_s3_keys_generator (self , prefix = "" , suffix = "" , include_dir_objects = False ):
113+ for obj in self ._get_matching_s3_objects_generator (prefix , suffix , include_dir_objects ):
110114 yield obj ["Key" ], obj ["LastModified" ]
111115
112116 def put_object (self , body , key ):
@@ -146,18 +150,15 @@ def _read_bytes_from_s3_single(self, key, allow_missing=False, ext_bucket=None):
146150
147151 return byte_array .strip ()
148152
149- def search (self , prefix = "" , suffix = "" ) -> Tuple [List [str ], List [datetime .datetime ]]:
153+ def search (
154+ self , prefix = "" , suffix = "" , include_dir_objects = False
155+ ) -> Tuple [List [str ], List [datetime .datetime ]]:
150156 paths = []
151157 timestamps = []
152158
153- timestamp_map = {}
154- for key , ts in self ._get_matching_s3_keys_generator (prefix , suffix ):
155- timestamp_map [key ] = ts
156-
157- filtered_keys = util .remove_non_empty_directory_paths (list (timestamp_map .keys ()))
158- for key in filtered_keys :
159+ for key , ts in self ._get_matching_s3_keys_generator (prefix , suffix , include_dir_objects ):
159160 paths .append (key )
160- timestamps .append (timestamp_map [ key ] )
161+ timestamps .append (ts )
161162
162163 return paths , timestamps
163164
@@ -217,12 +218,14 @@ def download_dir(self, prefix, local_dir):
217218 def download_dir_contents (self , prefix , local_dir ):
218219 util .mkdir_p (local_dir )
219220 prefix = util .ensure_suffix (prefix , "/" )
220- for key , _ in self ._get_matching_s3_keys_generator (prefix ):
221- if key .endswith ("/" ):
222- continue
221+ for key , _ in self ._get_matching_s3_keys_generator (prefix , include_dir_objects = True ):
223222 rel_path = util .trim_prefix (key , prefix )
224223 local_dest_path = os .path .join (local_dir , rel_path )
225- self .download_file (key , local_dest_path )
224+
225+ if not local_dest_path .endswith ("/" ):
226+ self .download_file (key , local_dest_path )
227+ else :
228+ util .mkdir_p (os .path .dirname (local_dest_path ))
226229
227230 def download_and_unzip (self , key , local_dir ):
228231 util .mkdir_p (local_dir )
0 commit comments