Merge branch 'develop' of https://github.com/Labelbox/labelbox-python into kkim/AL-5172

Kevin Kim · Kevin Kim · commit b87860a5d03d · 2023-03-13T10:50:28.000-07:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,9 +1,35 @@
 # Changelog
 
-# Version 3.40.0 (YYYY-MM-DD)
+# Version 3.40.1 (2023-03-10)
 
-## Added 
-* Insert newest changelogs here
+## Fixed
+* Fixed issue where calling create_batch() on exported data rows wasn't working
+
+# Version 3.40.0 (2023-03-10)
+
+## Added
+* Support Global keys to reference data rows in `Project.create_batch()`, `ModelRun.assign_data_rows_to_split()`
+* Support upserting labels via project_id in `model_run.upsert_labels()`
+* `media_type_override` param to export_v2
+* `last_activity_at` and `label_created_at` params to export_v2
+* New client method `is_feature_schema_archived()`
+* New client method `unarchive_feature_schema_node()`
+* New client method `delete_feature_schema_from_ontology()`
+
+## Changed
+* Removed default task names for export_v2
+
+## Fixed
+* process_label() for COCO panoptic dataset
+
+## Notebooks
+* Updated `annotation_import/pdf.ipynb` with more examples
+* Added `integrations/huggingface/huggingface.ipynb`
+* Fixed broken links for detectron notebooks in README
+* Added Dataset QueueMode during project creation in `integrations/detectron2/coco_object.ipynb` 
+* Removed metadata and updated ontology in `annotation_import/text.ipynb`
+* Removed confidence scores in `annotation_import/image.ipynb`
+* Updated custom embedding tutorial links in `basics/data_row_metadata.ipynb`
 
 # Version 3.39.0 (2023-02-28)
 ## Added
@@ -72,7 +98,6 @@
 * Added support for adding metadata by name when creating datarows using `Dataset.create_data_rows()`, `Dataset.create_data_rows_sync()`, and `Dataset.create_data_row()`.
 * Example notebooks for auto metrics in models
 
-
 ### Changed
 * `Dataset.create_data_rows()` max limit of DataRows increased to 150,000
 * Improved error handling for invalid annotation import content
diff --git a/CONTRIB.md b/CONTRIB.md
@@ -63,4 +63,20 @@ Each release should follow the following steps:
 6. This will kick off a Github Actions workflow that will:
   - Build the library in the [standard way](https://packaging.python.org/tutorials/packaging-projects/#generating-distribution-archives)
   - Upload the distribution archives in the [standard way](https://packaging.python.org/tutorials/packaging-projects/#uploading-the-distribution-archives)
-  with credentials for the `labelbox` PyPI user.
+ - with credentials for the `labelbox` PyPI user.
+  
+  ## Running Jupyter Notebooks
+  
+  We have plenty of good samples in the _examples_ directory and using them for testing can help us increase our productivity. One way to use jupyter notebooks is to run the jupyter server locally (another way is to use a VSC plugin, not documented here). It works really fast.
+  
+  Make sure your notebook will use your source code:
+  1. `ipython profile create`
+  2. `ipython locate` - will show where the config file is. This is the config file used by the jupyter server, since it runs via ipython
+  3. Open the file (this should be ipython_config.py and it is usually located in ~/.ipython/profile_default) and add the following line of code: 
+  ```
+  c.InteractiveShellApp.exec_lines = [
+    'import sys; sys.path.insert(0, "<labelbox-python root folder>")'
+  ]
+  ```
+  4. Go to the root of your project and run `jupyter notebook` to start the server
+  
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -21,7 +21,7 @@
 copyright = '2021, Labelbox'
 author = 'Labelbox'
 
-release = '3.39.0'
+release = '3.40.1'
 
 # -- General configuration ---------------------------------------------------
 
diff --git a/labelbox/__init__.py b/labelbox/__init__.py
@@ -1,5 +1,5 @@
 name = "labelbox"
-__version__ = "3.39.0"
+__version__ = "3.40.1"
 
 from labelbox.client import Client
 from labelbox.schema.project import Project
diff --git a/labelbox/client.py b/labelbox/client.py
@@ -36,6 +36,7 @@
 from labelbox.schema.role import Role
 from labelbox.schema.slice import CatalogSlice, ModelSlice
 from labelbox.schema.queue_mode import QueueMode
+from labelbox.schema.ontology import Ontology, DeleteFeatureFromOntologyResult
 
 from labelbox.schema.media_type import MediaType, get_media_type_validation_error
 
@@ -44,11 +45,6 @@
 _LABELBOX_API_KEY = "LABELBOX_API_KEY"
 
 
-class DeleteFeatureFromOntologyResult:
-    archived: bool
-    deleted: bool
-
-
 class Client:
     """ A Labelbox client.
 
@@ -1578,6 +1574,49 @@ def get_catalog_slice(self, slice_id) -> CatalogSlice:
         res = self.execute(query_str, {'id': slice_id})
         return Entity.CatalogSlice(self, res['getSavedQuery'])
 
+    def is_feature_schema_archived(self, ontology_id: str,
+                                   feature_schema_id: str) -> bool:
+        """
+        Returns true if a feature schema is archived in the specified ontology, returns false otherwise.
+
+        Args:
+            feature_schema_id (str): The ID of the feature schema
+            ontology_id (str): The ID of the ontology
+        Returns:
+            bool
+        """
+
+        ontology_endpoint = self.rest_endpoint + "/ontologies/" + urllib.parse.quote(
+            ontology_id)
+        response = requests.get(
+            ontology_endpoint,
+            headers=self.headers,
+        )
+
+        if response.status_code == requests.codes.ok:
+            feature_schema_nodes = response.json()['featureSchemaNodes']
+            tools = feature_schema_nodes['tools']
+            classifications = feature_schema_nodes['classifications']
+            relationships = feature_schema_nodes['relationships']
+            feature_schema_node_list = tools + classifications + relationships
+            filtered_feature_schema_nodes = [
+                feature_schema_node
+                for feature_schema_node in feature_schema_node_list
+                if feature_schema_node['featureSchemaId'] == feature_schema_id
+            ]
+            if filtered_feature_schema_nodes:
+                return bool(filtered_feature_schema_nodes[0]['archived'])
+            else:
+                raise labelbox.exceptions.LabelboxError(
+                    "The specified feature schema was not in the ontology.")
+
+        elif response.status_code == 404:
+            raise labelbox.exceptions.ResourceNotFoundError(
+                Ontology, ontology_id)
+        else:
+            raise labelbox.exceptions.LabelboxError(
+                "Failed to get the feature schema archived status.")
+
     def get_model_slice(self, slice_id) -> ModelSlice:
         """
         Fetches a Model Slice by ID.
@@ -1646,3 +1685,30 @@ def delete_feature_schema_from_ontology(
             raise labelbox.exceptions.LabelboxError(
                 "Failed to remove feature schema from ontology, message: " +
                 str(response.json()['message']))
+
+    def unarchive_feature_schema_node(self, ontology_id: str,
+                                      root_feature_schema_id: str) -> None:
+        """
+        Unarchives a feature schema node in an ontology.
+        Only root level feature schema nodes can be unarchived.
+        Args:
+            ontology_id (str): The ID of the ontology
+            root_feature_schema_id (str): The ID of the root level feature schema
+        Returns:
+            None
+        """
+        ontology_endpoint = self.rest_endpoint + "/ontologies/" + urllib.parse.quote(
+            ontology_id) + '/feature-schemas/' + urllib.parse.quote(
+                root_feature_schema_id) + '/unarchive'
+        response = requests.patch(
+            ontology_endpoint,
+            headers=self.headers,
+        )
+        if response.status_code == requests.codes.ok:
+            if not bool(response.json()['unarchived']):
+                raise labelbox.exceptions.LabelboxError(
+                    "Failed unarchive the feature schema.")
+        else:
+            raise labelbox.exceptions.LabelboxError(
+                "Failed unarchive the feature schema node, message: ",
+                response.text)
diff --git a/labelbox/schema/export_params.py b/labelbox/schema/export_params.py
@@ -1,6 +1,8 @@
 import sys
 
 from typing import Optional
+
+from labelbox.schema.media_type import MediaType
 if sys.version_info >= (3, 8):
     from typing import TypedDict
 else:
@@ -11,6 +13,7 @@ class DataRowParams(TypedDict):
     data_row_details: Optional[bool]
     metadata_fields: Optional[bool]
     attachments: Optional[bool]
+    media_type_override: Optional[MediaType]
 
 
 class ProjectExportParams(DataRowParams):
diff --git a/labelbox/schema/model_run.py b/labelbox/schema/model_run.py
@@ -335,14 +335,15 @@ def delete_model_run_data_rows(self, data_row_ids: List[str]):
 
     @experimental
     def assign_data_rows_to_split(self,
-                                  data_row_ids: List[str],
-                                  split: Union[DataSplit, str],
+                                  data_row_ids: List[str] = None,
+                                  split: Union[DataSplit, str] = None,
+                                  global_keys: List[str] = None,
                                   timeout_seconds=120):
 
         split_value = split.value if isinstance(split, DataSplit) else split
         valid_splits = DataSplit._member_names_
 
-        if split_value not in valid_splits:
+        if split_value is None or split_value not in valid_splits:
             raise ValueError(
                 f"`split` must be one of : `{valid_splits}`. Found : `{split}`")
 
@@ -354,7 +355,8 @@ def assign_data_rows_to_split(self,
                 'data': {
                     'assignments': [{
                         'split': split_value,
-                        'dataRowIds': data_row_ids
+                        'dataRowIds': data_row_ids,
+                        'globalKeys': global_keys,
                     }]
                 }
             },
@@ -521,6 +523,8 @@ def export_v2(self,
                     "modelRunId": self.uid
                 },
                 "params": {
+                    "mediaTypeOverride":
+                        _params.get('media_type_override', None),
                     "includeAttachments":
                         _params.get('attachments', False),
                     "includeMetadata":
diff --git a/labelbox/schema/ontology.py b/labelbox/schema/ontology.py
@@ -11,11 +11,21 @@
 from labelbox.exceptions import InconsistentOntologyException
 from labelbox.orm.db_object import DbObject
 from labelbox.orm.model import Field, Relationship
+import json
 
 FeatureSchemaId: Type[str] = constr(min_length=25, max_length=25)
 SchemaId: Type[str] = constr(min_length=25, max_length=25)
 
 
+class DeleteFeatureFromOntologyResult:
+    archived: bool
+    deleted: bool
+
+    def __str__(self):
+        return "<%s %s>" % (self.__class__.__name__.split(".")[-1],
+                            json.dumps(self.__dict__))
+
+
 class FeatureSchema(DbObject):
     name = Field.String("name")
     color = Field.String("name")
diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py
@@ -430,7 +430,8 @@ def export_v2(self,
             "data_row_details": False,
             "project_details": False,
             "performance_details": False,
-            "label_details": False
+            "label_details": False,
+            "media_type_override": None
         })
 
         _filters = filters or ProjectExportFilters({
@@ -451,6 +452,7 @@ def _get_timezone() -> str:
           """ % (mutation_name)
 
         search_query: List[Dict[str, Collection[str]]] = []
+        media_type_override = _params.get('media_type_override', None)
         query_params = {
             "input": {
                 "taskName": task_name,
@@ -462,6 +464,9 @@ def _get_timezone() -> str:
                     }
                 },
                 "params": {
+                    "mediaTypeOverride":
+                        media_type_override.value
+                        if media_type_override is not None else None,
                     "includeAttachments":
                         _params.get('attachments', False),
                     "includeMetadata":
@@ -809,7 +814,7 @@ def create_batch(
                         "`data_rows` must be DataRow ids or DataRow objects")
 
         if data_rows is not None:
-            row_count = len(data_rows)
+            row_count = len(dr_ids)
         elif global_keys is not None:
             row_count = len(global_keys)
         else:
diff --git a/scripts/update_sdk_version.sh b/scripts/update_sdk_version.sh
@@ -38,23 +38,21 @@ CHANGELOGS_FILE="$SDK_PATH/CHANGELOG.md"
 
 old_version=$(cat $SDK_PATH/labelbox/__init__.py | grep __version__ | cut -d '=' -f2 | tr -d ' ' | tr -d '"')
 
-echo "New version: $new_version"
-echo "Old version: $old_version"
-
+printf "Starting release process! $old_version --> $new_version\n"
 escaped_old_version=$(echo "$old_version" | sed "s/\./\\\./g")
 escaped_new_version=$(echo "$new_version" | sed "s/\./\\\./g")
 
 sed -i "" "s/$escaped_old_version/$escaped_new_version/" $INIT_FILE
-echo "Updated '$INIT_FILE'"
+printf "Updated '$INIT_FILE'\n"
 
 sed -i "" "s/$escaped_old_version/$escaped_new_version/" $READTHEDOCS_CONF_FILE
-echo "Updated '$READTHEDOCS_CONF_FILE'"
-echo "Successfully updated SDK version locally!"
+printf "Updated '$READTHEDOCS_CONF_FILE'\n"
+printf "Successfully updated SDK version locally!\n"
 
-echo "\nOpening CHANGELOGS file in text editor"
+printf "Opening CHANGELOGS file in text editor\n"
 open -e $CHANGELOGS_FILE
 
-echo "\nPlease open a PR to finish the release process using the following git commands:"
-echo "\ngit add --all"
-echo "git commit -m 'Preparing for $new_version release'"
-echo "git push origin prep_$new_version"
+printf "Please open a PR to finish the release process using the following git commands:\n"
+printf "git add --all\n"
+printf "git commit -m 'Preparing for $new_version release'\n"
+printf "git push origin prep_$new_version\n"
diff --git a/tests/integration/annotation_import/test_model_run.py b/tests/integration/annotation_import/test_model_run.py
@@ -1,5 +1,6 @@
 import time
 import os
+import uuid
 import pytest
 
 from collections import Counter
@@ -208,7 +209,8 @@ def test_model_run_export_v2(model_run_with_model_run_data_rows,
             assert prediction_id in label_ids_set
 
 
-def test_model_run_split_assignment(model_run, dataset, image_url):
+def test_model_run_split_assignment_by_data_row_ids(model_run, dataset,
+                                                    image_url):
     n_data_rows = 10
     data_rows = dataset.create_data_rows([{
         "row_data": image_url
@@ -227,3 +229,18 @@ def test_model_run_split_assignment(model_run, dataset, image_url):
             counts[data_row.data_split.value] += 1
         split = split.value if isinstance(split, DataSplit) else split
         assert counts[split] == n_data_rows
+
+
+def test_model_run_split_assignment_by_global_keys(model_run, data_rows):
+    global_keys = [data_row.global_key for data_row in data_rows]
+
+    model_run.upsert_data_rows(global_keys=global_keys)
+
+    for split in ["TRAINING", "TEST", "VALIDATION", "UNASSIGNED", *DataSplit]:
+        model_run.assign_data_rows_to_split(split=split,
+                                            global_keys=global_keys)
+        splits = [
+            data_row.data_split.value
+            for data_row in model_run.model_run_data_rows()
+        ]
+        assert len(set(splits)) == 1
diff --git a/tests/integration/test_ontology.py b/tests/integration/test_ontology.py
diff --git a/tests/integration/test_project.py b/tests/integration/test_project.py