Labelbox
diff --git a/‎labelbox/data/annotation_types/annotation.py‎
Lines changed: 20 additions & 2 deletions b/‎labelbox/data/annotation_types/annotation.py‎
Lines changed: 20 additions & 2 deletions
diff --git a/‎labelbox/data/annotation_types/classification/classification.py‎
Lines changed: 14 additions & 2 deletions b/‎labelbox/data/annotation_types/classification/classification.py‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎labelbox/data/annotation_types/collection.py‎
Lines changed: 117 additions & 61 deletions b/‎labelbox/data/annotation_types/collection.py‎
Lines changed: 117 additions & 61 deletions
diff --git a/‎labelbox/data/annotation_types/data/base_data.py‎
Lines changed: 4 additions & 0 deletions b/‎labelbox/data/annotation_types/data/base_data.py‎
Lines changed: 4 additions & 0 deletions
@@ -1,33 +1,51 @@
 from typing import Any, Dict, List, Union
 
 from .classification import Checklist, Dropdown, Radio, Text
-from .feature import FeatureSchemaRef
+from .feature import FeatureSchema
 from .geometry import Geometry
 from .ner import TextEntity
 
 
-class BaseAnnotation(FeatureSchemaRef):
+class BaseAnnotation(FeatureSchema):
+    """ Base annotation class. Shouldn't be directly instantiated
+    """
     classifications: List["ClassificationAnnotation"] = []
     extra: Dict[str, Any] = {}
 
 
 class ObjectAnnotation(BaseAnnotation):
+    """Class representing objects annotations (non classifications or annotations that have a location)
+    """
     value: Union[TextEntity, Geometry]
 
 
 class ClassificationAnnotation(BaseAnnotation):
+    """Class represneting classification annotations (annotations that don't have a location) """
     value: Union[Text, Checklist, Radio, Dropdown]
 
 
 ClassificationAnnotation.update_forward_refs()
 
 
 class VideoObjectAnnotation(ObjectAnnotation):
+    """
+    Class for video objects annotations
+
+    Args:
+        frame: The frame index that this annotation corresponds to
+        keyframe: Whether or not this annotation was a human generated or interpolated annotation
+    """
     frame: int
     keyframe: bool
 
 
 class VideoClassificationAnnotation(ClassificationAnnotation):
+    """
+    Class for video classification annotations
+
+    Args:
+        frame: The frame index that this annotation corresponds to
+    """
     frame: int
 
 
 
@@ -2,24 +2,36 @@
 
 from pydantic.main import BaseModel
 
-from ..feature import FeatureSchemaRef
+from ..feature import FeatureSchema
 
 
-class ClassificationAnswer(FeatureSchemaRef):
+class ClassificationAnswer(FeatureSchema):
+    """
+    - Represents a classification option.
+    - Because it inherits from FeatureSchema
+        the option can be represented with either the name or schema_id
+    """
     extra: Dict[str, Any] = {}
 
 
 class Radio(BaseModel):
+    """ A classification with only one selected option allowed """
     answer: ClassificationAnswer
 
 
 class Checklist(BaseModel):
+    """ A classification with many selected options allowed """
     answer: List[ClassificationAnswer]
 
 
 class Text(BaseModel):
+    """ Free form text """
     answer: str
 
 
 class Dropdown(BaseModel):
+    """
+    - A classification with many selected options allowed .
+    - This is not currently compatible with MAL.
+    """
     answer: List[ClassificationAnswer]
@@ -15,64 +15,47 @@
 
 class LabelCollection:
     """
-    A container for
-
+    A container for interacting with a collection of labels.
+    Less memory efficient than LabelGenerator but more performant and convenient to use.
+    Use on smaller datasets.
     """
 
     def __init__(self, data: Iterable[Label]):
         self._data = data
         self._index = 0
 
-    def __iter__(self):
-        self._index = 0
-        return self
-
-    def __next__(self) -> Label:
-        if self._index == len(self._data):
-            raise StopIteration
-
-        value = self._data[self._index]
-        self._index += 1
-        return value
-
-    def __len__(self) -> int:
-        return len(self._data)
-
-    def __getitem__(self, idx: int) -> Label:
-        return self._data[idx]
-
     def assign_schema_ids(
             self, ontology_builder: OntologyBuilder) -> "LabelCollection":
         """
-        Based on an ontology:
-            - Checks to make sure that the feature names exist in the ontology
-            - Updates the names to match the ontology.
+        Adds schema ids to all FeatureSchema objects in the Labels.
+        This is necessary for MAL.
+
+        Args:
+            ontology_builder: The ontology that matches the feature names assigned to objects in this LabelCollection
+        Returns:
+            LabelCollection. useful for chaining these modifying functions
         """
         for label in self._data:
             label.assign_schema_ids(ontology_builder)
         return self
 
-    def _ensure_unique_external_ids(self) -> None:
-        external_ids = set()
-        for label in self._data:
-            if label.data.external_id is None:
-                label.data.external_id = uuid4()
-            else:
-                if label.data.external_id in external_ids:
-                    raise ValueError(
-                        f"External ids must be unique for bulk uploading. Found {label.data.external_id} more than once."
-                    )
-            external_ids.add(label.data.external_id)
-
     def add_to_dataset(self,
-                       dataset,
-                       signer,
+                       dataset: "Entity.Dataset",
+                       signer: Callable[[bytes], str],
                        max_concurrency=20) -> "LabelCollection":
         """
-        # It is reccomended to create a new dataset if memory is a concern
-        # Also note that this relies on exported data that it cached.
-        # So this will not work on the same dataset more frequently than every 30 min.
-        # The workaround is creating a new dataset
+        Creates data rows from each labels data object and attaches the data to the given dataset.
+        Updates the label's data object to have the same external_id and uid as the data row.
+        It is reccomended to create a new dataset if memory is a concern because all dataset data rows are exported to make this faster.
+        Also note that this relies on exported data that it cached.
+        So this will not work on the same dataset more frequently than every 30 min.
+        The workaround is creating a new dataset each time this function is used.
+
+        Args:
+            dataset: labelbox dataset object to add the new data row to
+            signer: A function that accepts bytes and returns a signed url.
+        Returns:
+            LabelCollection with updated references to new data rows
         """
         self._ensure_unique_external_ids()
         self.add_url_to_data(signer, max_concurrency=max_concurrency)
@@ -92,8 +75,16 @@ def add_to_dataset(self,
 
     def add_url_to_masks(self, signer, max_concurrency=20) -> "LabelCollection":
         """
-        Creates a data row id for each data row that needs it. If the data row exists then it skips the row.
-        TODO: Add error handling..
+        Creates signed urls for all masks in the LabelCollection.
+        Multiple masks can reference the same RasterData mask so this makes sure we only upload that url once.
+        Only uploads url if one doesn't already exist.
+
+        Args:
+            signer: A function that accepts bytes and returns a signed url.
+            max_concurrency: how many threads to use for uploading.
+                Should be balanced to match the signing services capabilities.
+        Returns:
+            LabelCollection with updated references to the new mask urls
         """
         for row in self._apply_threaded(
             [label.add_url_to_masks for label in self._data], max_concurrency,
@@ -103,14 +94,52 @@ def add_url_to_masks(self, signer, max_concurrency=20) -> "LabelCollection":
 
     def add_url_to_data(self, signer, max_concurrency=20) -> "LabelCollection":
         """
-        TODO: Add error handling..
+        Creates signed urls for the data
+        Only uploads url if one doesn't already exist.
+
+        Args:
+            signer: A function that accepts bytes and returns a signed url.
+            max_concurrency: how many threads to use for uploading.
+                Should be balanced to match the signing services capabilities.
+        Returns:
+            LabelCollection with updated references to the new data urls
         """
         for row in self._apply_threaded(
             [label.add_url_to_data for label in self._data], max_concurrency,
                 signer):
             ...
         return self
 
+    def _ensure_unique_external_ids(self) -> None:
+        external_ids = set()
+        for label in self._data:
+            if label.data.external_id is None:
+                label.data.external_id = uuid4()
+            else:
+                if label.data.external_id in external_ids:
+                    raise ValueError(
+                        f"External ids must be unique for bulk uploading. Found {label.data.external_id} more than once."
+                    )
+            external_ids.add(label.data.external_id)
+
+    def __iter__(self) -> "LabelCollection":
+        self._index = 0
+        return self
+
+    def __next__(self) -> Label:
+        if self._index == len(self._data):
+            raise StopIteration
+
+        value = self._data[self._index]
+        self._index += 1
+        return value
+
+    def __len__(self) -> int:
+        return len(self._data)
+
+    def __getitem__(self, idx: int) -> Label:
+        return self._data[idx]
+
     def _apply_threaded(self, fns, max_concurrency, *args):
         futures = []
         with ThreadPoolExecutor(max_workers=max_concurrency) as executor:
@@ -122,6 +151,8 @@ def _apply_threaded(self, fns, max_concurrency, *args):
 
 class LabelGenerator(PrefetchGenerator):
     """
+    A container for interacting with a collection of labels.
+
     Use this class if you have larger data. It is slightly harder to work with
     than the LabelCollection but will be much more memory efficient.
     """
@@ -130,14 +161,6 @@ def __init__(self, data: Generator[Label, None, None], *args, **kwargs):
         self._fns = {}
         super().__init__(data, *args, **kwargs)
 
-    def __iter__(self):
-        return self
-
-    def process(self, value):
-        for fn in self._fns.values():
-            value = fn(value)
-        return value
-
     def as_collection(self) -> "LabelCollection":
         return LabelCollection(data=list(self))
 
@@ -154,8 +177,13 @@ def _assign_ids(label: Label):
     def add_url_to_data(self, signer: Callable[[bytes],
                                                str]) -> "LabelGenerator":
         """
-        Updates masks to have `url` attribute
-        Doesn't update masks that already have urls
+        Creates signed urls for the data
+        Only uploads url if one doesn't already exist.
+
+        Args:
+            signer: A function that accepts bytes and returns a signed url.
+        Returns:
+            LabelGenerator that signs urls as data is accessed
         """
 
         def _add_url_to_data(label: Label):
@@ -165,8 +193,20 @@ def _add_url_to_data(label: Label):
         self._fns['_add_url_to_data'] = _add_url_to_data
         return self
 
-    def add_to_dataset(self, dataset,
+    def add_to_dataset(self, dataset: "Entity.Dataset",
                        signer: Callable[[bytes], str]) -> "LabelGenerator":
+        """
+        Creates data rows from each labels data object and attaches the data to the given dataset.
+        Updates the label's data object to have the same external_id and uid as the data row.
+
+        This is a lot slower than LabelCollection.add_to_dataset but also more memory efficient.
+
+        Args:
+            dataset: labelbox dataset object to add the new data row to
+            signer: A function that accepts bytes and returns a signed url.
+        Returns:
+            LabelGenerator that updates references to the new data rows as data is accessed
+        """
 
         def _add_to_dataset(label: Label):
             label.create_data_row(dataset, signer)
@@ -178,8 +218,16 @@ def _add_to_dataset(label: Label):
     def add_url_to_masks(self, signer: Callable[[bytes],
                                                 str]) -> "LabelGenerator":
         """
-        Updates masks to have `url` attribute
-        Doesn't update masks that already have urls
+        Creates signed urls for all masks in the LabelGenerator.
+        Multiple masks can reference the same RasterData mask so this makes sure we only upload that url once.
+        Only uploads url if one doesn't already exist.
+
+        Args:
+            signer: A function that accepts bytes and returns a signed url.
+            max_concurrency: how many threads to use for uploading.
+                Should be balanced to match the signing services capabilities.
+        Returns:
+            LabelGenerator that updates references to the new mask urls as data is accessed
         """
 
         def _add_url_to_masks(label: Label):
@@ -189,14 +237,22 @@ def _add_url_to_masks(label: Label):
         self._fns['add_url_to_masks'] = _add_url_to_masks
         return self
 
+    def __iter__(self):
+        return self
+
+    def _process(self, value):
+        for fn in self._fns.values():
+            value = fn(value)
+        return value
+
     def __next__(self):
         """
-        - Double check that all values have been set.
-        - Items could have been processed before any of these modifying functions are called.
-        - None of these functions do anything if run more than once so the cost is minimal.
+        Double checks that all values have been set.
+        Items could have been processed before any of these modifying functions are called.
+        None of these functions do anything if run more than once so the cost is minimal.
         """
         value = super().__next__()
-        return self.process(value)
+        return self._process(value)
 
 
 LabelData = Union[LabelCollection, LabelGenerator]
@@ -4,5 +4,9 @@
 
 
 class BaseData(BaseModel):
+    """
+    Base class for objects representing data.
+    This class shouldn't directly be used
+    """
     external_id: Optional[str] = None
     uid: Optional[str] = None