Skip to content

Commit 8169d0a

Browse files
author
Matt Sokoloff
committed
merge ms/bulk-export-status
2 parents 06c12b5 + 2cd9a4e commit 8169d0a

File tree

4 files changed

+91
-11
lines changed

4 files changed

+91
-11
lines changed

labelbox/client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ def upload_data(self,
261261
content: bytestring to upload
262262
filename: name of the upload
263263
content_type: content type of data uploaded
264-
sign: Whether or not to sign the url
264+
sign: whether or not to sign the url
265265
266266
Returns:
267267
str, the URL of uploaded data.

labelbox/schema/bulk_import_request.py

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22
import time
33
from uuid import UUID, uuid4
4+
import functools
45

56
import logging
67
from pathlib import Path
@@ -113,6 +114,60 @@ class BulkImportRequest(DbObject):
113114
project = Relationship.ToOne("Project")
114115
created_by = Relationship.ToOne("User", False, "created_by")
115116

117+
@property
118+
def inputs(self) -> Optional[List[Dict[str, str]]]:
119+
"""
120+
Inputs for each individual annotation uploaded.
121+
This should match the ndjson annotations that you have uploaded.
122+
123+
Returns:
124+
Uploaded ndjsons.
125+
126+
* This information will expire after 24 hours.
127+
"""
128+
return self._fetch_remote_ndjson(self.input_file_url)
129+
130+
@property
131+
def errors(self) -> Optional[List[Dict[str, str]]]:
132+
"""
133+
Errors for each individual annotation uploaded.
134+
135+
Returns:
136+
Empty list if there are no errors and None if the update is still running.
137+
If there are errors, and the job has completed then a list of dicts containing the error messages will be returned.
138+
139+
* This information will expire after 24 hours.
140+
"""
141+
return self._fetch_remote_ndjson(self.error_file_url)
142+
143+
@property
144+
def statuses(self) -> Optional[List[Dict[str, str]]]:
145+
"""
146+
Status for each individual annotation uploaded.
147+
148+
Returns:
149+
A status for each annotation if the upload is done running and was successful. Otherwise it returns None.
150+
151+
* This information will expire after 24 hours.
152+
"""
153+
return self._fetch_remote_ndjson(self.status_file_url)
154+
155+
@functools.lru_cache()
156+
def _fetch_remote_ndjson(
157+
self, url: Optional[str]) -> Optional[List[Dict[str, str]]]:
158+
"""
159+
Fetches the remote ndjson file and caches the results.
160+
161+
Args:
162+
url (str): either the input_file_url, error_file_url, status_file_url, or None
163+
urls are None when the file is unavailable.
164+
Returns:
165+
None if the url is None or the ndjson as a list of dicts.
166+
"""
167+
if url is not None:
168+
return ndjson.loads(requests.get(url).text)
169+
return None
170+
116171
def refresh(self) -> None:
117172
"""Synchronizes values of all fields with the database.
118173
"""
@@ -632,15 +687,14 @@ def validate_subclasses(cls, value, field):
632687
#Create uuid and datarow id so we don't have to define classification objects twice
633688
#This is caused by the fact that we require these ids for top level classifications but not for subclasses
634689
results = []
690+
dummy_id = 'child'.center(25, '_')
635691
for row in value:
636-
copied_row = row.copy()
637-
copied_row.update({
638-
'dataRow': {
639-
'id': 'child'.center(25, '_')
692+
results.append({
693+
**row, 'dataRow': {
694+
'id': dummy_id
640695
},
641696
'uuid': str(uuid4())
642697
})
643-
results.append(copied_row)
644698
return results
645699

646700

labelbox/schema/enums.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,19 @@
33

44
class BulkImportRequestState(Enum):
55
""" State of the import job when importing annotations (RUNNING, FAILED, or FINISHED).
6+
7+
.. list-table::
8+
:widths: 15 150
9+
:header-rows: 1
10+
11+
* - State
12+
- Description
13+
* - RUNNING
14+
- Indicates that the import job is not done yet.
15+
* - FAILED
16+
- Indicates the import job failed. Check `BulkImportRequest.errors` for more information
17+
* - FINISHED
18+
- Indicates the import job is no longer running. Check `BulkImportRequest.statuses` for more information
619
"""
720
RUNNING = "RUNNING"
821
FAILED = "FAILED"

tests/integration/bulk_import/test_bulk_import_request.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -121,17 +121,30 @@ def test_validate_ndjson_uuid(tmp_path, configured_project, predictions):
121121

122122

123123
@pytest.mark.slow
124-
def test_wait_till_done(configured_project):
124+
def test_wait_till_done(rectangle_inference, configured_project):
125125
name = str(uuid.uuid4())
126-
url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson"
126+
url = configured_project.client.upload_data(content=ndjson.dumps(
127+
[rectangle_inference]),
128+
sign=True)
127129
bulk_import_request = configured_project.upload_annotations(name=name,
128130
annotations=url,
129131
validate=False)
130132

131-
bulk_import_request.wait_until_done()
133+
assert bulk_import_request.errors is None
134+
assert bulk_import_request.statuses is None
135+
assert len(bulk_import_request.inputs) == 1
132136

133-
assert (bulk_import_request.state == BulkImportRequestState.FINISHED or
134-
bulk_import_request.state == BulkImportRequestState.FAILED)
137+
bulk_import_request.wait_until_done()
138+
assert bulk_import_request.state == BulkImportRequestState.FINISHED
139+
140+
#Check that the status files are being returned as expected
141+
assert len(bulk_import_request.errors) == 0
142+
assert len(bulk_import_request.inputs) == 1
143+
assert bulk_import_request.inputs[0]['uuid'] == rectangle_inference['uuid']
144+
assert len(bulk_import_request.statuses) == 1
145+
assert bulk_import_request.statuses[0]['status'] == 'SUCCESS'
146+
assert bulk_import_request.statuses[0]['uuid'] == rectangle_inference[
147+
'uuid']
135148

136149

137150
def assert_file_content(url: str, predictions):

0 commit comments

Comments
 (0)