Merge pull request #634 from Labelbox/mno/AL-2849

msokoloff1 · web-flow · commit fe3af354c758 · 2022-09-12T10:34:08.000-04:00
[AL-2849] Add upsert_predictions_and_send_to_project method
diff --git a/labelbox/orm/model.py b/labelbox/orm/model.py
@@ -371,6 +371,7 @@ class Entity(metaclass=EntityMeta):
         LabelingFrontendOptions: Type[labelbox.LabelingFrontendOptions]
         Label: Type[labelbox.Label]
         MEAPredictionImport: Type[labelbox.MEAPredictionImport]
+        MALPredictionImport: Type[labelbox.MALPredictionImport]
         Invite: Type[labelbox.Invite]
         InviteLimit: Type[labelbox.InviteLimit]
         ProjectRole: Type[labelbox.ProjectRole]
diff --git a/labelbox/schema/annotation_import.py b/labelbox/schema/annotation_import.py
@@ -318,6 +318,88 @@ def _create_mea_import_from_bytes(
         return cls(client, res["createModelErrorAnalysisPredictionImport"])
 
 
+class MEAToMALPredictionImport(AnnotationImport):
+    project = Relationship.ToOne("Project", cache=True)
+
+    @property
+    def parent_id(self) -> str:
+        """
+        Identifier for this import. Used to refresh the status
+        """
+        return self.project().uid
+
+    @classmethod
+    def create_for_model_run_data_rows(cls, client: "labelbox.Client",
+                                       model_run_id: str,
+                                       data_row_ids: List[str], project_id: str,
+                                       name: str) -> "MEAToMALPredictionImport":
+        """
+        Create an MEA to MAL prediction import job from a list of data row ids of a specific model run
+
+        Args:
+            client: Labelbox Client for executing queries
+            data_row_ids: A list of data row ids
+            model_run_id: model run id
+        Returns:
+            MEAToMALPredictionImport
+        """
+        query_str = cls._get_model_run_data_rows_mutation()
+        return cls(
+            client,
+            client.execute(query_str,
+                           params={
+                               "dataRowIds": data_row_ids,
+                               "modelRunId": model_run_id,
+                               "projectId": project_id,
+                               "name": name
+                           })["createMalPredictionImportForModelRunDataRows"])
+
+    @classmethod
+    def from_name(cls,
+                  client: "labelbox.Client",
+                  project_id: str,
+                  name: str,
+                  as_json: bool = False) -> "MEAToMALPredictionImport":
+        """
+        Retrieves an MEA to MAL import job.
+
+        Args:
+            client: Labelbox Client for executing queries
+            project_id:  ID used for querying import jobs
+            name: Name of the import job.
+        Returns:
+            MALPredictionImport
+        """
+        query_str = """query getMEAToMALPredictionImportPyApi($projectId : ID!, $name: String!) {
+            meaToMalPredictionImport(
+                where: {projectId: $projectId, name: $name}){
+                    %s
+                }}""" % query.results_query_part(cls)
+        params = {
+            "projectId": project_id,
+            "name": name,
+        }
+        response = client.execute(query_str, params)
+        if response is None:
+            raise labelbox.exceptions.ResourceNotFoundError(
+                MALPredictionImport, params)
+        response = response["meaToMalPredictionImport"]
+        if as_json:
+            return response
+        return cls(client, response)
+
+    @classmethod
+    def _get_model_run_data_rows_mutation(cls) -> str:
+        return """mutation createMalPredictionImportForModelRunDataRowsPyApi($dataRowIds: [ID!]!, $name: String!, $modelRunId: ID!, $projectId:ID!) {
+            createMalPredictionImportForModelRunDataRows(data: {
+                name: $name
+                modelRunId: $modelRunId
+                dataRowIds: $dataRowIds
+                projectId: $projectId
+            }) {%s}
+        }""" % query.results_query_part(cls)
+
+
 class MALPredictionImport(AnnotationImport):
     project = Relationship.ToOne("Project", cache=True)
 
diff --git a/labelbox/schema/model_run.py b/labelbox/schema/model_run.py
@@ -18,6 +18,8 @@
 
 logger = logging.getLogger(__name__)
 
+DATAROWS_IMPORT_LIMIT = 25000
+
 
 class DataSplit(Enum):
     TRAINING = "TRAINING"
@@ -123,6 +125,56 @@ def _wait_until_done(self, status_fn, timeout_seconds=120, sleep_time=5):
                 )
             time.sleep(sleep_time)
 
+    def upsert_predictions_and_send_to_project(
+        self,
+        name: str,
+        predictions: Union[str, Path, Iterable[Dict]],
+        project_id: str,
+        priority: Optional[int] = 5,
+    ) -> 'MEAPredictionImport':  # type: ignore
+        """ Upload predictions and create a batch import to project.
+        Args:
+            name (str): name of the AnnotationImport job as well as the name of the batch import
+            predictions (Iterable):
+                iterable of annotation rows
+            project_id (str): id of the project to import into
+            priority (int): priority of the job
+        Returns:
+            (MEAPredictionImport, Batch, MEAToMALPredictionImport)
+        """
+        kwargs = dict(client=self.client, model_run_id=self.uid, name=name)
+        project = self.client.get_project(project_id)
+        import_job = self.add_predictions(name, predictions)
+        prediction_statuses = import_job.statuses
+        mea_to_mal_data_rows_set = set([
+            row['dataRow']['id']
+            for row in prediction_statuses
+            if row['status'] == 'SUCCESS'
+        ])
+        mea_to_mal_data_rows = list(
+            mea_to_mal_data_rows_set)[:DATAROWS_IMPORT_LIMIT]
+
+        if len(mea_to_mal_data_rows) >= DATAROWS_IMPORT_LIMIT:
+
+            logger.warning(
+                f"Got {len(mea_to_mal_data_rows_set)} data rows to import, trimmed down to {DATAROWS_IMPORT_LIMIT} data rows"
+            )
+        if len(mea_to_mal_data_rows) == 0:
+            return import_job, None, None
+
+        try:
+            batch = project.create_batch(name, mea_to_mal_data_rows, priority)
+            try:
+                mal_prediction_import = Entity.MEAToMALPredictionImport.create_for_model_run_data_rows(
+                    data_row_ids=mea_to_mal_data_rows,
+                    project_id=project_id,
+                    **kwargs)
+                return import_job, batch, mal_prediction_import
+            except:
+                return import_job, batch, None
+        except:
+            return import_job, None, None
+
     def add_predictions(
         self,
         name: str,
@@ -264,11 +316,11 @@ def update_status(self,
 
     @experimental
     def update_config(self, config: Dict[str, Any]) -> Dict[str, Any]:
-        """ 
+        """
          Updates the Model Run's training metadata config
-         Args: 
+         Args:
              config (dict): A dictionary of keys and values
-         Returns: 
+         Returns:
              Model Run id and updated training metadata
          """
         data: Dict[str, Any] = {'config': config}
@@ -285,9 +337,9 @@ def update_config(self, config: Dict[str, Any]) -> Dict[str, Any]:
 
     @experimental
     def reset_config(self) -> Dict[str, Any]:
-        """ 
+        """
          Resets Model Run's training metadata config
-         Returns: 
+         Returns:
              Model Run id and reset training metadata
          """
         res = self.client.execute(
@@ -300,10 +352,10 @@ def reset_config(self) -> Dict[str, Any]:
 
     @experimental
     def get_config(self) -> Dict[str, Any]:
-        """ 
-         Gets Model Run's training metadata 
-         Returns: 
-             training metadata as a dictionary 
+        """
+         Gets Model Run's training metadata
+         Returns:
+             training metadata as a dictionary
          """
         res = self.client.execute("""query ModelRunPyApi($modelRunId: ID!){
                 modelRun(where: {id : $modelRunId}){trainingMetadata}
diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py
@@ -157,6 +157,18 @@ def configured_project_pdf(client, ontology, rand_gen, pdf_url):
     dataset.delete()
 
 
+@pytest.fixture
+def configured_project_without_data_rows(client, configured_project, rand_gen):
+    project = client.create_project(name=rand_gen(str))
+    editor = list(
+        client.get_labeling_frontends(
+            where=LabelingFrontend.name == "editor"))[0]
+    project.setup_editor(configured_project.ontology())
+    project.update(queue_mode=project.QueueMode.Batch)
+    yield project
+    project.delete()
+
+
 @pytest.fixture
 def prediction_id_mapping(configured_project):
     #Maps tool types to feature schema ids
@@ -422,6 +434,7 @@ def model_run_with_model_run_data_rows(client, configured_project,
     model_run.upsert_labels(label_ids)
     time.sleep(3)
     yield model_run
+    model_run.delete()
     # TODO: Delete resources when that is possible ..
 
 
diff --git a/tests/integration/annotation_import/test_upsert_prediction_import.py b/tests/integration/annotation_import/test_upsert_prediction_import.py
@@ -0,0 +1,134 @@
+import uuid
+import ndjson
+import pytest
+
+from labelbox.schema.annotation_import import AnnotationImportState, MEAPredictionImport
+"""
+- Here we only want to check that the uploads are calling the validation
+- Then with unit tests we can check the types of errors raised
+
+"""
+
+
+def test_create_from_url(client, tmp_path, object_predictions,
+                         model_run_with_model_run_data_rows,
+                         configured_project_without_data_rows,
+                         annotation_import_test_helpers):
+    name = str(uuid.uuid4())
+    file_name = f"{name}.json"
+    file_path = tmp_path / file_name
+
+    model_run_data_rows = [
+        mrdr.data_row().uid
+        for mrdr in model_run_with_model_run_data_rows.model_run_data_rows()
+    ]
+    predictions = [
+        p for p in object_predictions
+        if p['dataRow']['id'] in model_run_data_rows
+    ]
+    with file_path.open("w") as f:
+        ndjson.dump(predictions, f)
+
+    # Needs to have data row ids
+
+    with open(file_path, "r") as f:
+        url = client.upload_data(content=f.read(),
+                                 filename=file_name,
+                                 sign=True,
+                                 content_type="application/json")
+
+    annotation_import, batch, mal_prediction_import = model_run_with_model_run_data_rows.upsert_predictions_and_send_to_project(
+        name=name,
+        predictions=url,
+        project_id=configured_project_without_data_rows.uid,
+        priority=5)
+
+    assert annotation_import.model_run_id == model_run_with_model_run_data_rows.uid
+    annotation_import.wait_until_done()
+    assert not annotation_import.errors
+    assert annotation_import.statuses
+
+    assert batch
+    assert batch.project().uid == configured_project_without_data_rows.uid
+
+    assert mal_prediction_import
+    mal_prediction_import.wait_until_done()
+
+    assert not mal_prediction_import.errors
+    assert mal_prediction_import.statuses
+
+
+def test_create_from_objects(model_run_with_model_run_data_rows,
+                             configured_project_without_data_rows,
+                             object_predictions,
+                             annotation_import_test_helpers):
+    name = str(uuid.uuid4())
+    model_run_data_rows = [
+        mrdr.data_row().uid
+        for mrdr in model_run_with_model_run_data_rows.model_run_data_rows()
+    ]
+    predictions = [
+        p for p in object_predictions
+        if p['dataRow']['id'] in model_run_data_rows
+    ]
+    annotation_import, batch, mal_prediction_import = model_run_with_model_run_data_rows.upsert_predictions_and_send_to_project(
+        name=name,
+        predictions=predictions,
+        project_id=configured_project_without_data_rows.uid,
+        priority=5)
+
+    assert annotation_import.model_run_id == model_run_with_model_run_data_rows.uid
+    annotation_import.wait_until_done()
+    assert not annotation_import.errors
+    assert annotation_import.statuses
+
+    assert batch
+    assert batch.project().uid == configured_project_without_data_rows.uid
+
+    assert mal_prediction_import
+    mal_prediction_import.wait_until_done()
+
+    assert not mal_prediction_import.errors
+    assert mal_prediction_import.statuses
+
+
+def test_create_from_local_file(tmp_path, model_run_with_model_run_data_rows,
+                                configured_project_without_data_rows,
+                                object_predictions,
+                                annotation_import_test_helpers):
+
+    name = str(uuid.uuid4())
+    file_name = f"{name}.ndjson"
+    file_path = tmp_path / file_name
+
+    model_run_data_rows = [
+        mrdr.data_row().uid
+        for mrdr in model_run_with_model_run_data_rows.model_run_data_rows()
+    ]
+    predictions = [
+        p for p in object_predictions
+        if p['dataRow']['id'] in model_run_data_rows
+    ]
+
+    with file_path.open("w") as f:
+        ndjson.dump(predictions, f)
+
+    annotation_import, batch, mal_prediction_import = model_run_with_model_run_data_rows.upsert_predictions_and_send_to_project(
+        name=name,
+        predictions=str(file_path),
+        project_id=configured_project_without_data_rows.uid,
+        priority=5)
+
+    assert annotation_import.model_run_id == model_run_with_model_run_data_rows.uid
+    annotation_import.wait_until_done()
+    assert not annotation_import.errors
+    assert annotation_import.statuses
+
+    assert batch
+    assert batch.project().uid == configured_project_without_data_rows.uid
+
+    assert mal_prediction_import
+    mal_prediction_import.wait_until_done()
+
+    assert not mal_prediction_import.errors
+    assert mal_prediction_import.statuses