merge ms/bulk-export-status

Matt Sokoloff · Matt Sokoloff · commit 8169d0a31a46 · 2021-03-29T09:58:14.000-04:00
diff --git a/labelbox/client.py b/labelbox/client.py
@@ -261,7 +261,7 @@ def upload_data(self,
             content: bytestring to upload
             filename: name of the upload
             content_type: content type of data uploaded
-            sign: Whether or not to sign the url
+            sign: whether or not to sign the url
 
         Returns:
             str, the URL of uploaded data.
diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py
@@ -1,6 +1,7 @@
 import json
 import time
 from uuid import UUID, uuid4
+import functools
 
 import logging
 from pathlib import Path
@@ -113,6 +114,60 @@ class BulkImportRequest(DbObject):
     project = Relationship.ToOne("Project")
     created_by = Relationship.ToOne("User", False, "created_by")
 
+    @property
+    def inputs(self) -> Optional[List[Dict[str, str]]]:
+        """
+        Inputs for each individual annotation uploaded.
+        This should match the ndjson annotations that you have uploaded. 
+        
+        Returns:
+            Uploaded ndjsons.
+
+        * This information will expire after 24 hours.    
+        """
+        return self._fetch_remote_ndjson(self.input_file_url)
+
+    @property
+    def errors(self) -> Optional[List[Dict[str, str]]]:
+        """
+        Errors for each individual annotation uploaded.
+
+        Returns:
+            Empty list if there are no errors and None if the update is still running.
+            If there are errors, and the job has completed then a list of dicts containing the error messages will be returned.
+
+        * This information will expire after 24 hours.        
+        """
+        return self._fetch_remote_ndjson(self.error_file_url)
+
+    @property
+    def statuses(self) -> Optional[List[Dict[str, str]]]:
+        """
+        Status for each individual annotation uploaded.
+
+        Returns:
+            A status for each annotation if the upload is done running and was successful. Otherwise it returns None.
+
+        * This information will expire after 24 hours.        
+        """
+        return self._fetch_remote_ndjson(self.status_file_url)
+
+    @functools.lru_cache()
+    def _fetch_remote_ndjson(
+            self, url: Optional[str]) -> Optional[List[Dict[str, str]]]:
+        """
+        Fetches the remote ndjson file and caches the results.
+
+        Args:
+            url (str): either the input_file_url, error_file_url, status_file_url, or None
+                urls are None when the file is unavailable.
+        Returns:
+            None if the url is None or the ndjson as a list of dicts.
+        """
+        if url is not None:
+            return ndjson.loads(requests.get(url).text)
+        return None
+
     def refresh(self) -> None:
         """Synchronizes values of all fields with the database.
         """
@@ -632,15 +687,14 @@ def validate_subclasses(cls, value, field):
         #Create uuid and datarow id so we don't have to define classification objects twice
         #This is caused by the fact that we require these ids for top level classifications but not for subclasses
         results = []
+        dummy_id = 'child'.center(25, '_')
         for row in value:
-            copied_row = row.copy()
-            copied_row.update({
-                'dataRow': {
-                    'id': 'child'.center(25, '_')
+            results.append({
+                **row, 'dataRow': {
+                    'id': dummy_id
                 },
                 'uuid': str(uuid4())
             })
-            results.append(copied_row)
         return results
 
 
diff --git a/labelbox/schema/enums.py b/labelbox/schema/enums.py
@@ -3,6 +3,19 @@
 
 class BulkImportRequestState(Enum):
     """ State of the import job when importing annotations (RUNNING, FAILED, or FINISHED).
+
+    .. list-table::
+       :widths: 15 150
+       :header-rows: 1 
+
+       * - State
+         - Description
+       * - RUNNING
+         - Indicates that the import job is not done yet.
+       * - FAILED
+         - Indicates the import job failed. Check `BulkImportRequest.errors` for more information
+       * - FINISHED
+         - Indicates the import job is no longer running. Check `BulkImportRequest.statuses` for more information
     """
     RUNNING = "RUNNING"
     FAILED = "FAILED"
diff --git a/tests/integration/bulk_import/test_bulk_import_request.py b/tests/integration/bulk_import/test_bulk_import_request.py
@@ -121,17 +121,30 @@ def test_validate_ndjson_uuid(tmp_path, configured_project, predictions):
 
 
 @pytest.mark.slow
-def test_wait_till_done(configured_project):
+def test_wait_till_done(rectangle_inference, configured_project):
     name = str(uuid.uuid4())
-    url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson"
+    url = configured_project.client.upload_data(content=ndjson.dumps(
+        [rectangle_inference]),
+                                                sign=True)
     bulk_import_request = configured_project.upload_annotations(name=name,
                                                                 annotations=url,
                                                                 validate=False)
 
-    bulk_import_request.wait_until_done()
+    assert bulk_import_request.errors is None
+    assert bulk_import_request.statuses is None
+    assert len(bulk_import_request.inputs) == 1
 
-    assert (bulk_import_request.state == BulkImportRequestState.FINISHED or
-            bulk_import_request.state == BulkImportRequestState.FAILED)
+    bulk_import_request.wait_until_done()
+    assert bulk_import_request.state == BulkImportRequestState.FINISHED
+
+    #Check that the status files are being returned as expected
+    assert len(bulk_import_request.errors) == 0
+    assert len(bulk_import_request.inputs) == 1
+    assert bulk_import_request.inputs[0]['uuid'] == rectangle_inference['uuid']
+    assert len(bulk_import_request.statuses) == 1
+    assert bulk_import_request.statuses[0]['status'] == 'SUCCESS'
+    assert bulk_import_request.statuses[0]['uuid'] == rectangle_inference[
+        'uuid']
 
 
 def assert_file_content(url: str, predictions):