Skip to content

Commit b93d6d1

Browse files
committed
Add a BaseBlobService.list_blob_names method
This can currently also be achieved via `[b.name for b in bbs.list_blobs()]` but that parses the full XML and discards most of the parsed information again. With this change listing the blob names is not anymore CPU-bound for us.
1 parent 0a92c37 commit b93d6d1

File tree

3 files changed

+141
-3
lines changed

3 files changed

+141
-3
lines changed

azure-storage-blob/azure/storage/blob/_deserialization.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,77 @@ def _convert_xml_to_blob_list(response):
351351
return blob_list
352352

353353

354+
def _convert_xml_to_blob_name_list(response):
355+
'''
356+
<?xml version="1.0" encoding="utf-8"?>
357+
<EnumerationResults ServiceEndpoint="http://myaccount.blob.core.windows.net/" ContainerName="mycontainer">
358+
<Prefix>string-value</Prefix>
359+
<Marker>string-value</Marker>
360+
<MaxResults>int-value</MaxResults>
361+
<Delimiter>string-value</Delimiter>
362+
<Blobs>
363+
<Blob>
364+
<Name>blob-name</name>
365+
<Deleted>true</Deleted>
366+
<Snapshot>date-time-value</Snapshot>
367+
<Properties>
368+
<Last-Modified>date-time-value</Last-Modified>
369+
<Etag>etag</Etag>
370+
<Content-Length>size-in-bytes</Content-Length>
371+
<Content-Type>blob-content-type</Content-Type>
372+
<Content-Encoding />
373+
<Content-Language />
374+
<Content-MD5 />
375+
<Cache-Control />
376+
<x-ms-blob-sequence-number>sequence-number</x-ms-blob-sequence-number>
377+
<BlobType>BlockBlob|PageBlob|AppendBlob</BlobType>
378+
<LeaseStatus>locked|unlocked</LeaseStatus>
379+
<LeaseState>available | leased | expired | breaking | broken</LeaseState>
380+
<LeaseDuration>infinite | fixed</LeaseDuration>
381+
<CopyId>id</CopyId>
382+
<CopyStatus>pending | success | aborted | failed </CopyStatus>
383+
<CopySource>source url</CopySource>
384+
<CopyProgress>bytes copied/bytes total</CopyProgress>
385+
<CopyCompletionTime>datetime</CopyCompletionTime>
386+
<CopyStatusDescription>error string</CopyStatusDescription>
387+
<AccessTier>P4 | P6 | P10 | P20 | P30 | P40 | P50 | P60 | Archive | Cool | Hot</AccessTier>
388+
<AccessTierChangeTime>date-time-value</AccessTierChangeTime>
389+
<AccessTierInferred>true</AccessTierInferred>
390+
<DeletedTime>datetime</DeletedTime>
391+
<RemainingRetentionDays>int</RemainingRetentionDays>
392+
<Creation-Time>date-time-value</Creation-Time>
393+
</Properties>
394+
<Metadata>
395+
<Name>value</Name>
396+
</Metadata>
397+
</Blob>
398+
<BlobPrefix>
399+
<Name>blob-prefix</Name>
400+
</BlobPrefix>
401+
</Blobs>
402+
<NextMarker />
403+
</EnumerationResults>
404+
'''
405+
if response is None or response.body is None:
406+
return None
407+
408+
blob_list = _list()
409+
list_element = ETree.fromstring(response.body)
410+
411+
setattr(blob_list, 'next_marker', list_element.findtext('NextMarker'))
412+
413+
blobs_element = list_element.find('Blobs')
414+
blob_prefix_elements = blobs_element.findall('BlobPrefix')
415+
if blob_prefix_elements is not None:
416+
for blob_prefix_element in blob_prefix_elements:
417+
blob_list.append(blob_prefix_element.findtext('Name'))
418+
419+
for blob_element in blobs_element.findall('Blob'):
420+
blob_list.append(blob_element.findtext('Name'))
421+
422+
return blob_list
423+
424+
354425
def _convert_xml_to_block_list(response):
355426
'''
356427
<?xml version="1.0" encoding="utf-8"?>

azure-storage-blob/azure/storage/blob/baseblobservice.py

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
_convert_xml_to_containers,
5959
_parse_blob,
6060
_convert_xml_to_blob_list,
61+
_convert_xml_to_blob_name_list,
6162
_parse_container,
6263
_parse_snapshot_blob,
6364
_parse_lease,
@@ -1243,14 +1244,66 @@ def list_blobs(self, container_name, prefix=None, num_results=None, include=None
12431244
args = (container_name,)
12441245
kwargs = {'prefix': prefix, 'marker': marker, 'max_results': num_results,
12451246
'include': include, 'delimiter': delimiter, 'timeout': timeout,
1246-
'_context': operation_context}
1247+
'_context': operation_context,
1248+
'_converter': _convert_xml_to_blob_list}
1249+
resp = self._list_blobs(*args, **kwargs)
1250+
1251+
return ListGenerator(resp, self._list_blobs, args, kwargs)
1252+
1253+
def list_blob_names(self, container_name, prefix=None, num_results=None,
1254+
include=None, delimiter=None, marker=None,
1255+
timeout=None):
1256+
'''
1257+
Returns a generator to list the blob names under the specified container.
1258+
The generator will lazily follow the continuation tokens returned by
1259+
the service and stop when all blobs have been returned or num_results is reached.
1260+
1261+
If num_results is specified and the account has more than that number of
1262+
blobs, the generator will have a populated next_marker field once it
1263+
finishes. This marker can be used to create a new generator if more
1264+
results are desired.
1265+
1266+
:param str container_name:
1267+
Name of existing container.
1268+
:param str prefix:
1269+
Filters the results to return only blobs whose names
1270+
begin with the specified prefix.
1271+
:param int num_results:
1272+
Specifies the maximum number of blobs to return,
1273+
including all :class:`BlobPrefix` elements. If the request does not specify
1274+
num_results or specifies a value greater than 5,000, the server will
1275+
return up to 5,000 items. Setting num_results to a value less than
1276+
or equal to zero results in error response code 400 (Bad Request).
1277+
:param ~azure.storage.blob.models.Include include:
1278+
Specifies one or more additional datasets to include in the response.
1279+
:param str delimiter:
1280+
When the request includes this parameter, the operation
1281+
returns a :class:`~azure.storage.blob.models.BlobPrefix` element in the
1282+
result list that acts as a placeholder for all blobs whose names begin
1283+
with the same substring up to the appearance of the delimiter character.
1284+
The delimiter may be a single character or a string.
1285+
:param str marker:
1286+
An opaque continuation token. This value can be retrieved from the
1287+
next_marker field of a previous generator object if num_results was
1288+
specified and that generator has finished enumerating results. If
1289+
specified, this generator will begin returning results from the point
1290+
where the previous generator stopped.
1291+
:param int timeout:
1292+
The timeout parameter is expressed in seconds.
1293+
'''
1294+
operation_context = _OperationContext(location_lock=True)
1295+
args = (container_name,)
1296+
kwargs = {'prefix': prefix, 'marker': marker, 'max_results': num_results,
1297+
'include': include, 'delimiter': delimiter, 'timeout': timeout,
1298+
'_context': operation_context,
1299+
'_converter': _convert_xml_to_blob_name_list}
12471300
resp = self._list_blobs(*args, **kwargs)
12481301

12491302
return ListGenerator(resp, self._list_blobs, args, kwargs)
12501303

12511304
def _list_blobs(self, container_name, prefix=None, marker=None,
12521305
max_results=None, include=None, delimiter=None, timeout=None,
1253-
_context=None):
1306+
_context=None, _converter=None):
12541307
'''
12551308
Returns the list of blobs under the specified container.
12561309
@@ -1319,7 +1372,7 @@ def _list_blobs(self, container_name, prefix=None, marker=None,
13191372
'timeout': _int_to_str(timeout),
13201373
}
13211374

1322-
return self._perform_request(request, _convert_xml_to_blob_list, operation_context=_context)
1375+
return self._perform_request(request, _converter, operation_context=_context)
13231376

13241377
def get_blob_account_information(self, container_name=None, blob_name=None, timeout=None):
13251378
"""

tests/blob/test_container.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -706,6 +706,20 @@ def test_delete_container_with_lease_id(self):
706706
exists = self.bs.exists(container_name)
707707
self.assertFalse(exists)
708708

709+
@record
710+
def test_list_names(self):
711+
# Arrange
712+
container_name = self._create_container()
713+
data = b'hello world'
714+
self.bs.create_blob_from_bytes (container_name, 'blob1', data, )
715+
self.bs.create_blob_from_bytes (container_name, 'blob2', data, )
716+
717+
# Act
718+
blobs = list(self.bs.list_blob_names(container_name))
719+
720+
self.assertEqual(blobs, ['blob1', 'blob2'])
721+
722+
709723
@record
710724
def test_list_blobs(self):
711725
# Arrange

0 commit comments

Comments
 (0)