merge

Matt Sokoloff · Matt Sokoloff · commit c35995840bf9 · 2021-07-25T12:52:28.000-04:00
diff --git a/labelbox/data/annotation_types/__init__.py b/labelbox/data/annotation_types/__init__.py
@@ -24,5 +24,5 @@
 
 from .label import Label
 
-from .collection import LabelCollection
+from .collection import LabelList
 from .collection import LabelGenerator
diff --git a/labelbox/data/annotation_types/annotation.py b/labelbox/data/annotation_types/annotation.py
@@ -15,7 +15,7 @@ class BaseAnnotation(FeatureSchema):
 
 
 class ClassificationAnnotation(BaseAnnotation):
-    """Class represneting classification annotations (annotations that don't have a location) """
+    """Class representing classification annotations (annotations that don't have a location) """
     value: Union[Text, Checklist, Radio, Dropdown]
 
 
diff --git a/labelbox/data/annotation_types/collection.py b/labelbox/data/annotation_types/collection.py
@@ -13,7 +13,7 @@
 logger = logging.getLogger(__name__)
 
 
-class LabelCollection:
+class LabelList:
     """
     A container for interacting with a collection of labels.
     Less memory efficient than LabelGenerator but more performant and convenient to use.
@@ -25,16 +25,15 @@ def __init__(self, data: Iterable[Label]):
         self._index = 0
 
     def assign_schema_ids(
-            self,
-            ontology_builder: "ontology.OntologyBuilder") -> "LabelCollection":
+            self, ontology_builder: "ontology.OntologyBuilder") -> "LabelList":
         """
         Adds schema ids to all FeatureSchema objects in the Labels.
         This is necessary for MAL.
 
         Args:
-            ontology_builder: The ontology that matches the feature names assigned to objects in this LabelCollection
+            ontology_builder: The ontology that matches the feature names assigned to objects in this LabelList
         Returns:
-            LabelCollection. useful for chaining these modifying functions
+            LabelList. useful for chaining these modifying functions
         """
         for label in self._data:
             label.assign_schema_ids(ontology_builder)
@@ -43,7 +42,7 @@ def assign_schema_ids(
     def add_to_dataset(self,
                        dataset: "Entity.Dataset",
                        signer: Callable[[bytes], str],
-                       max_concurrency=20) -> "LabelCollection":
+                       max_concurrency=20) -> "LabelList":
         """
         Creates data rows from each labels data object and attaches the data to the given dataset.
         Updates the label's data object to have the same external_id and uid as the data row.
@@ -56,7 +55,7 @@ def add_to_dataset(self,
             dataset: labelbox dataset object to add the new data row to
             signer: A function that accepts bytes and returns a signed url.
         Returns:
-            LabelCollection with updated references to new data rows
+            LabelList with updated references to new data rows
         """
         self._ensure_unique_external_ids()
         self.add_url_to_data(signer, max_concurrency=max_concurrency)
@@ -74,9 +73,9 @@ def add_to_dataset(self,
             label.data.uid = data_row_lookup[label.data.external_id]
         return self
 
-    def add_url_to_masks(self, signer, max_concurrency=20) -> "LabelCollection":
+    def add_url_to_masks(self, signer, max_concurrency=20) -> "LabelList":
         """
-        Creates signed urls for all masks in the LabelCollection.
+        Creates signed urls for all masks in the LabelList.
         Multiple masks can reference the same RasterData mask so this makes sure we only upload that url once.
         Only uploads url if one doesn't already exist.
 
@@ -85,15 +84,15 @@ def add_url_to_masks(self, signer, max_concurrency=20) -> "LabelCollection":
             max_concurrency: how many threads to use for uploading.
                 Should be balanced to match the signing services capabilities.
         Returns:
-            LabelCollection with updated references to the new mask urls
+            LabelList with updated references to the new mask urls
         """
         for row in self._apply_threaded(
             [label.add_url_to_masks for label in self._data], max_concurrency,
                 signer):
             ...
         return self
 
-    def add_url_to_data(self, signer, max_concurrency=20) -> "LabelCollection":
+    def add_url_to_data(self, signer, max_concurrency=20) -> "LabelList":
         """
         Creates signed urls for the data
         Only uploads url if one doesn't already exist.
@@ -103,7 +102,7 @@ def add_url_to_data(self, signer, max_concurrency=20) -> "LabelCollection":
             max_concurrency: how many threads to use for uploading.
                 Should be balanced to match the signing services capabilities.
         Returns:
-            LabelCollection with updated references to the new data urls
+            LabelList with updated references to the new data urls
         """
         for row in self._apply_threaded(
             [label.add_url_to_data for label in self._data], max_concurrency,
@@ -123,7 +122,7 @@ def _ensure_unique_external_ids(self) -> None:
                     )
             external_ids.add(label.data.external_id)
 
-    def __iter__(self) -> "LabelCollection":
+    def __iter__(self) -> "LabelList":
         self._index = 0
         return self
 
@@ -156,15 +155,15 @@ class LabelGenerator(PrefetchGenerator):
     A container for interacting with a collection of labels.
 
     Use this class if you have larger data. It is slightly harder to work with
-    than the LabelCollection but will be much more memory efficient.
+    than the LabelList but will be much more memory efficient.
     """
 
     def __init__(self, data: Generator[Label, None, None], *args, **kwargs):
         self._fns = {}
         super().__init__(data, *args, **kwargs)
 
-    def as_collection(self) -> "LabelCollection":
-        return LabelCollection(data=list(self))
+    def as_list(self) -> "LabelList":
+        return LabelList(data=list(self))
 
     def assign_schema_ids(
             self,
@@ -202,7 +201,7 @@ def add_to_dataset(self, dataset: "Entity.Dataset",
         Creates data rows from each labels data object and attaches the data to the given dataset.
         Updates the label's data object to have the same external_id and uid as the data row.
 
-        This is a lot slower than LabelCollection.add_to_dataset but also more memory efficient.
+        This is a lot slower than LabelList.add_to_dataset but also more memory efficient.
 
         Args:
             dataset: labelbox dataset object to add the new data row to
@@ -272,4 +271,4 @@ def __next__(self):
         return self._process(value)
 
 
-LabelContainer = Union[LabelCollection, LabelGenerator]
+LabelCollection = Union[LabelList, LabelGenerator]
diff --git a/labelbox/data/annotation_types/data/video.py b/labelbox/data/annotation_types/data/video.py
@@ -119,8 +119,7 @@ def create_url(self, signer: Callable[[bytes], str]) -> None:
             self.file_path = self.frames_to_video(self.frames)
             self.url = self.create_url(signer)
         else:
-            raise ValueError(
-                "One of url, im_bytes, file_path, numpy must not be None.")
+            raise ValueError("One of url, file_path, frames must not be None.")
         return self.url
 
     def frames_to_video(self,
diff --git a/labelbox/data/annotation_types/feature.py b/labelbox/data/annotation_types/feature.py
@@ -11,7 +11,7 @@ class FeatureSchema(BaseModel):
     Could be a annotation, a subclass, or an option.
     Schema ids might not be known when constructing these objects so both a name and schema id are valid.
 
-    Use `LabelCollection.assign_schema_ids` or `LabelGenerator.assign_schema_ids`
+    Use `LabelList.assign_schema_ids` or `LabelGenerator.assign_schema_ids`
     to retroactively add schema ids by looking them up from the names.
     """
     name: Optional[str] = None
diff --git a/labelbox/data/annotation_types/label.py b/labelbox/data/annotation_types/label.py
@@ -111,7 +111,7 @@ def assign_schema_ids(
         Args:
             ontology_builder: The ontology that matches the feature names assigned to objects in this dataset
         Returns:
-            LabelCollection. useful for chaining these modifying functions
+            Label. useful for chaining these modifying functions
         """
         tool_lookup, classification_lookup = self._get_feature_schema_lookup(
             ontology_builder)
diff --git a/labelbox/data/generator.py b/labelbox/data/generator.py
@@ -33,10 +33,7 @@ class PrefetchGenerator:
     Useful for modifying the generator results based on data from a network
     """
 
-    def __init__(self,
-                 data: Iterable[Any],
-                 prefetch_limit=20,
-                 max_concurrency=4):
+    def __init__(self, data: Iterable[Any], prefetch_limit=20, num_executors=4):
         if isinstance(data, (list, tuple)):
             self._data = (r for r in data)
         else:
@@ -47,10 +44,10 @@ def __init__(self,
         self.completed_threads = 0
         # Can only iterate over once it the queue.get hangs forever.
         self.done = False
-        self.max_concurrency = max_concurrency
-        with ThreadPoolExecutor(max_workers=max_concurrency) as executor:
+        self.num_executors = num_executors
+        with ThreadPoolExecutor(max_workers=num_executors) as executor:
             self.futures = [
-                executor.submit(self.fill_queue) for _ in range(max_concurrency)
+                executor.submit(self.fill_queue) for _ in range(num_executors)
             ]
 
     def _process(self, value) -> Any:
@@ -78,7 +75,7 @@ def __next__(self) -> Any:
         value = self.queue.get()
         while value is None:
             self.completed_threads += 1
-            if self.completed_threads == self.max_concurrency:
+            if self.completed_threads == self.num_executors:
                 self.done = True
                 raise StopIteration
             value = self.queue.get()
diff --git a/tests/data/annotation_types/test_collection.py b/tests/data/annotation_types/test_collection.py
@@ -5,7 +5,7 @@
 import pytest
 from labelbox import DataRow
 from labelbox.data.annotation_types.annotation import ObjectAnnotation
-from labelbox.data.annotation_types.collection import (LabelCollection,
+from labelbox.data.annotation_types.collection import (LabelList,
                                                        LabelGenerator)
 from labelbox.data.annotation_types.data.raster import RasterData
 from labelbox.data.annotation_types.geometry.line import Line
@@ -66,7 +66,7 @@ def test_generator(list_of_labels):
 
 def test_conversion(list_of_labels):
     generator = LabelGenerator(list_of_labels)
-    label_collection = generator.as_collection()
+    label_collection = generator.as_list()
     assert len(label_collection) == len(list_of_labels)
     assert [x for x in label_collection] == list_of_labels
 
@@ -87,7 +87,7 @@ def test_adding_schema_ids():
         tools=[Tool(Tool.Type.LINE, name=name, feature_schema_id=schema_id)])
     generator = LabelGenerator([label]).assign_schema_ids(ontology)
     assert next(generator).annotations[0].schema_id == schema_id
-    labels = LabelCollection([label]).assign_schema_ids(ontology)
+    labels = LabelList([label]).assign_schema_ids(ontology)
     assert next(labels).annotations[0].schema_id == schema_id
     assert labels[0].annotations[0].schema_id == schema_id
 
@@ -106,7 +106,7 @@ def test_adding_urls(signer):
                                                         3)).astype(np.uint8)),
                   annotations=[])
     assert label.data.url != uuid
-    labels = LabelCollection([label]).add_url_to_data(signer(uuid))
+    labels = LabelList([label]).add_url_to_data(signer(uuid))
     assert label.data.url == uuid
     assert next(labels).data.url == uuid
     assert labels[0].data.url == uuid
@@ -133,7 +133,7 @@ def test_adding_to_dataset(signer):
     assert label.data.url != uuid
     assert label.data.external_id == None
     assert label.data.uid != dataset.uid
-    labels = LabelCollection([label]).add_to_dataset(dataset, signer(uuid))
+    labels = LabelList([label]).add_to_dataset(dataset, signer(uuid))
     assert label.data.url == uuid
     assert label.data.external_id != None
     assert label.data.uid == dataset.uid
@@ -169,6 +169,6 @@ def test_adding_to_masks(signer):
                                         color=[255, 255, 255]))
         ])
     assert label.annotations[0].value.mask.url != uuid
-    labels = LabelCollection([label]).add_url_to_masks(signer(uuid))
+    labels = LabelList([label]).add_url_to_masks(signer(uuid))
     assert next(labels).annotations[0].value.mask.url == uuid
     assert labels[0].annotations[0].value.mask.url == uuid