add missing type annotations

Matt Sokoloff · Matt Sokoloff · commit cbc228fc84ab · 2021-07-20T22:04:26.000-04:00
diff --git a/labelbox/data/annotation_types/data/raster.py b/labelbox/data/annotation_types/data/raster.py
@@ -118,7 +118,9 @@ def validate_args(cls, values):
         return values
 
     class Config:
-        # TODO: Create a type for numpy arrays
+        # Required for numpy arrays
         arbitrary_types_allowed = True
+        # Required for sharing references
         copy_on_model_validation = False
+        # Required for discriminating between data types
         extra = 'forbid'
diff --git a/labelbox/data/annotation_types/data/text.py b/labelbox/data/annotation_types/data/text.py
@@ -71,4 +71,5 @@ def validate_date(cls, values):
         return values
 
     class config:
+        # Required for discriminating between data types
         extra = 'forbid'
diff --git a/labelbox/data/annotation_types/data/video.py b/labelbox/data/annotation_types/data/video.py
@@ -56,7 +56,6 @@ def frame_generator(
             logger.info("Downloading the video locally to %s", file_path)
             urllib.request.urlretrieve(self.url, file_path)
             self.file_path = file_path
-            # TODO: If the filepath exists but there was no data we should use the url (and the opposite too)
 
         vidcap = cv2.VideoCapture(self.file_path)
 
@@ -135,6 +134,7 @@ def validate_data(cls, values):
         return values
 
     class Config:
-        # TODO: Create numpy array type
+        # Required for numpy arrays
         arbitrary_types_allowed = True
+        # Required for discriminating between data types
         extra = 'forbid'
diff --git a/labelbox/data/annotation_types/geometry/polygon.py b/labelbox/data/annotation_types/geometry/polygon.py
@@ -1,9 +1,9 @@
-from typing import Any, Dict, List
+from typing import List
 
 import numpy as np
 import geojson
 import cv2
-from pydantic import ValidationError, validator
+from pydantic import validator
 
 from .point import Point
 from .geometry import Geometry
@@ -36,7 +36,7 @@ def raster(self, height: int, width: int, color=255) -> np.ndarray:
     @validator('points')
     def is_geom_valid(cls, points):
         if len(points) < 3:
-            raise ValidationError(
+            raise ValueError(
                 f"A polygon must have at least 3 points to be valid. Found {points}"
             )
         return points
diff --git a/labelbox/data/annotation_types/geometry/rectangle.py b/labelbox/data/annotation_types/geometry/rectangle.py
@@ -1,8 +1,7 @@
-from typing import Any, Dict
-
 import cv2
 import geojson
 import numpy as np
+
 from .geometry import Geometry
 from .point import Point
 
@@ -38,5 +37,3 @@ def raster(self, height: int, width: int, color: int = 255) -> np.ndarray:
         canvas = np.zeros((height, width), dtype=np.uint8)
         pts = np.array(self.geometry['coordinates']).astype(np.int32)
         return cv2.fillPoly(canvas, pts=pts, color=color)
-
-    # TODO: Validate the start points are less than the end points
diff --git a/labelbox/data/annotation_types/ner.py b/labelbox/data/annotation_types/ner.py
@@ -1,6 +1,6 @@
 from typing import Dict, Any
 
-from pydantic import BaseModel, root_validator, ValidationError
+from pydantic import BaseModel, root_validator
 
 
 class TextEntity(BaseModel):
diff --git a/labelbox/data/serialization/labelbox_v1/classification.py b/labelbox/data/serialization/labelbox_v1/classification.py
@@ -98,8 +98,9 @@ def to_common(self):
         return classifications
 
     @classmethod
-    def from_common(cls,
-                    annotations: List[AnnotationType]) -> "LBV1Classifications":
+    def from_common(
+            cls, annotations: List[ClassificationAnnotation]
+    ) -> "LBV1Classifications":
         classifications = []
         for annotation in annotations:
             classification = classification_mapping.get(type(annotation.value))
diff --git a/labelbox/data/serialization/labelbox_v1/converter.py b/labelbox/data/serialization/labelbox_v1/converter.py
@@ -1,4 +1,4 @@
-from typing import Any, Callable, Dict, Generator, Iterable, Optional
+from typing import Any, Callable, Dict, Generator, Iterable
 import logging
 
 import ndjson
diff --git a/labelbox/data/serialization/labelbox_v1/label.py b/labelbox/data/serialization/labelbox_v1/label.py
@@ -39,7 +39,6 @@ def to_common(self):
                 # Labelbox doesn't support subclasses on image level classifications
                 # These are added to top level classifications
                 classifications=[],
-                #keyframe = classification.keyframe,
                 frame=self.frame_number,
                 name=classification.title)
             for classification in self.classifications
@@ -58,7 +57,7 @@ def to_common(self):
                             'feature_id': cls.feature_id,
                             'title': cls.title,
                             'value': cls.value,
-                            'keyframe': getattr(cls, 'keyframe', None)
+                            #'keyframe': getattr(cls, 'keyframe', None)
                         }) for cls in obj.classifications
                 ],
                 name=obj.title,
@@ -111,76 +110,63 @@ class LBV1Label(BaseModel):
     data_row_id: str = Field(..., alias="DataRow ID")
     row_data: str = Field(..., alias="Labeled Data")
     external_id: Optional[str] = Field(None, alias="External ID")
-    created_by: Optional[str] = Field(None, alias='Created By')
-
-    id: Optional[str] = Field(None, alias='ID')
-    project_name: Optional[str] = Field(None, alias='Project Name')
-    created_at: Optional[str] = Field(None, alias='Created At')
-    updated_at: Optional[str] = Field(None, alias='Updated At')
-    seconds_to_label: Optional[float] = Field(None, alias='Seconds to Label')
-    agreement: Optional[float] = Field(None, alias='Agreement')
-    benchmark_agreement: Optional[float] = Field(None,
-                                                 alias='Benchmark Agreement')
-    benchmark_id: Optional[float] = Field(None, alias='Benchmark ID')
-    dataset_name: Optional[str] = Field(None, alias='Dataset Name')
-    reviews: Optional[List[Review]] = Field(None, alias='Reviews')
-    label_url: Optional[str] = Field(None, alias='View Label')
-    has_open_issues: Optional[float] = Field(None, alias='Has Open Issues')
-    skipped: Optional[bool] = Field(None, alias='Skipped')
-
-    def construct_data_ref(self, is_video):
-        # TODO: Let users specify the type ...
-        keys = {'external_id': self.external_id, 'uid': self.data_row_id}
 
-        if is_video:
-            return VideoData(url=self.row_data, **keys)
-        if any([x in self.row_data for x in (".jpg", ".png", ".jpeg")
-               ]) and self.row_data.startswith(("http://", "https://")):
-            return RasterData(url=self.row_data, **keys)
-        elif any([x in self.row_data for x in (".txt", ".text", ".html")
-                 ]) and self.row_data.startswith(("http://", "https://")):
-            return TextData(url=self.row_data, **keys)
-        elif isinstance(self.row_data, str):
-            return TextData(text=self.row_data, **keys)
-        elif len([
-                annotation for annotation in self.label.objects
-                if isinstance(annotation, TextEntity)
-        ]):
-            return TextData(url=self.row_data, **keys)
-        else:
-            raise TypeError("Can't infer data type from row data.")
+    created_by: Optional[str] = Field(None,
+                                      alias='Created By',
+                                      extra_field=True)
+    project_name: Optional[str] = Field(None,
+                                        alias='Project Name',
+                                        extra_field=True)
+    id: Optional[str] = Field(None, alias='ID', extra_field=True)
+    created_at: Optional[str] = Field(None,
+                                      alias='Created At',
+                                      extra_field=True)
+    updated_at: Optional[str] = Field(None,
+                                      alias='Updated At',
+                                      extra_field=True)
+    seconds_to_label: Optional[float] = Field(None,
+                                              alias='Seconds to Label',
+                                              extra_field=True)
+    agreement: Optional[float] = Field(None,
+                                       alias='Agreement',
+                                       extra_field=True)
+    benchmark_agreement: Optional[float] = Field(None,
+                                                 alias='Benchmark Agreement',
+                                                 extra_field=True)
+    benchmark_id: Optional[float] = Field(None,
+                                          alias='Benchmark ID',
+                                          extra_field=True)
+    dataset_name: Optional[str] = Field(None,
+                                        alias='Dataset Name',
+                                        extra_field=True)
+    reviews: Optional[List[Review]] = Field(None,
+                                            alias='Reviews',
+                                            extra_field=True)
+    label_url: Optional[str] = Field(None, alias='View Label', extra_field=True)
+    has_open_issues: Optional[float] = Field(None,
+                                             alias='Has Open Issues',
+                                             extra_field=True)
+    skipped: Optional[bool] = Field(None, alias='Skipped', extra_field=True)
 
     def to_common(self) -> Label:
-        is_video = False
         if isinstance(self.label, list):
             annotations = []
             for lbl in self.label:
                 annotations.extend(lbl.to_common())
-            is_video = True
+            data = VideoData(url=self.row_data,
+                             external_id=self.external_id,
+                             uid=self.data_row_id)
         else:
             annotations = self.label.to_common()
+            data = self._infer_media_type()
 
-        return Label(
-            data=self.construct_data_ref(is_video),
-            annotations=annotations,
-            extra={
-                'Created By': self.created_by,
-                'Project Name': self.project_name,
-                'ID': self.id,
-                'Created At': self.created_at,
-                'Updated At': self.updated_at,
-                'Seconds to Label': self.seconds_to_label,
-                'Agreement': self.agreement,
-                'Benchmark Agreement': self.benchmark_agreement,
-                'Benchmark ID': self.benchmark_id,
-                'Dataset Name': self.dataset_name,
-                'Reviews': [
-                    review.dict(by_alias=True) for review in self.reviews
-                ],
-                'View Label': self.label_url,
-                'Has Open Issues': self.has_open_issues,
-                'Skipped': self.skipped
-            })
+        return Label(data=data,
+                     annotations=annotations,
+                     extra={
+                         field.alias: getattr(self, field_name)
+                         for field_name, field in self.__fields__.items()
+                         if field.field_info.extra.get('extra_field')
+                     })
 
     @classmethod
     def from_common(cls, label: Label, signer: Callable[[bytes], str]):
@@ -196,5 +182,23 @@ def from_common(cls, label: Label, signer: Callable[[bytes], str]):
                          external_id=label.data.external_id,
                          **label.extra)
 
+    def _infer_media_type(self):
+        keys = {'external_id': self.external_id, 'uid': self.data_row_id}
+        if any([x in self.row_data for x in (".jpg", ".png", ".jpeg")
+               ]) and self.row_data.startswith(("http://", "https://")):
+            return RasterData(url=self.row_data, **keys)
+        elif any([x in self.row_data for x in (".txt", ".text", ".html")
+                 ]) and self.row_data.startswith(("http://", "https://")):
+            return TextData(url=self.row_data, **keys)
+        elif isinstance(self.row_data, str):
+            return TextData(text=self.row_data, **keys)
+        elif len([
+                annotation for annotation in self.label.objects
+                if isinstance(annotation, TextEntity)
+        ]):
+            return TextData(url=self.row_data, **keys)
+        else:
+            raise TypeError("Can't infer data type from row data.")
+
     class Config:
         allow_population_by_field_name = True
diff --git a/labelbox/data/serialization/labelbox_v1/objects.py b/labelbox/data/serialization/labelbox_v1/objects.py
@@ -2,8 +2,7 @@
 
 from pydantic import BaseModel
 
-from ...annotation_types.annotation import (AnnotationType,
-                                            ClassificationAnnotation,
+from ...annotation_types.annotation import (ClassificationAnnotation,
                                             ObjectAnnotation)
 from ...annotation_types.data import RasterData
 from ...annotation_types.geometry import Line, Mask, Point, Polygon, Rectangle
@@ -174,16 +173,6 @@ def from_common(cls, text_entity: TextEntity,
                    **extra)
 
 
-object_mapping = {
-    Line: LBV1Line,
-    Point: LBV1Point,
-    Polygon: LBV1Polygon,
-    Rectangle: LBV1Rectangle,
-    Mask: LBV1Mask,
-    TextEntity: LBV1TextEntity
-}
-
-
 class LBV1Objects(BaseModel):
     objects: List[Union[LBV1Line, LBV1Point, LBV1Polygon, LBV1Rectangle,
                         LBV1TextEntity, LBV1Mask]]
@@ -215,23 +204,37 @@ def to_common(self) -> List[ObjectAnnotation]:
         return objects
 
     @classmethod
-    def from_common(cls, annotations: List[AnnotationType]) -> "LBV1Objects":
+    def from_common(cls, annotations: List[ObjectAnnotation]) -> "LBV1Objects":
         objects = []
-
         for annotation in annotations:
-            obj = object_mapping.get(type(annotation.value))
-            if obj is not None:
-                subclasses = []
-                subclasses = LBV1Classifications.from_common(
-                    annotation.classifications).classifications
-
-                objects.append(
-                    obj.from_common(
-                        annotation.value, subclasses, annotation.schema_id,
-                        annotation.name, {
-                            'keyframe': getattr(annotation, 'keyframe', None),
-                            **annotation.extra
-                        }))
-            else:
-                raise TypeError(f"Unexpected type {type(annotation.value)}")
+            obj = cls.lookup_object(annotation)
+            subclasses = []
+            subclasses = LBV1Classifications.from_common(
+                annotation.classifications).classifications
+
+            objects.append(
+                obj.from_common(
+                    annotation.value, subclasses, annotation.schema_id,
+                    annotation.name, {
+                        'keyframe': getattr(annotation, 'keyframe', None),
+                        **annotation.extra
+                    }))
         return cls(objects=objects)
+
+    @staticmethod
+    def lookup_object(annotation: ObjectAnnotation) -> "LBV1ObjectType":
+        result = {
+            Line: LBV1Line,
+            Point: LBV1Point,
+            Polygon: LBV1Polygon,
+            Rectangle: LBV1Rectangle,
+            Mask: LBV1Mask,
+            TextEntity: LBV1TextEntity
+        }.get(type(annotation.value))
+        if result is None:
+            raise TypeError(f"Unexpected type {type(annotation.value)}")
+        return result
+
+
+LBV1ObjectType = Union[LBV1Line, LBV1Point, LBV1Polygon, LBV1Rectangle,
+                       LBV1Mask, LBV1TextEntity]
diff --git a/labelbox/data/serialization/ndjson/classification.py b/labelbox/data/serialization/ndjson/classification.py
diff --git a/labelbox/data/serialization/ndjson/converter.py b/labelbox/data/serialization/ndjson/converter.py
diff --git a/labelbox/data/serialization/ndjson/label.py b/labelbox/data/serialization/ndjson/label.py
diff --git a/labelbox/data/serialization/ndjson/objects.py b/labelbox/data/serialization/ndjson/objects.py

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-from typing import Any, Callable, Dict, Generator, Iterable, Optional`
	`1`	`+from typing import Any, Callable, Dict, Generator, Iterable`
`2`	`2`	`import logging`
`3`	`3`
`4`	`4`	`import ndjson`