WIP to objects file

jtsodapop · jtsodapop · commit c42d1515aff4 · 2022-03-30T15:32:18.000-04:00
diff --git a/labelbox/data/serialization/ndjson/label.py b/labelbox/data/serialization/ndjson/label.py
@@ -5,7 +5,7 @@
 
 from pydantic import BaseModel
 
-from ...annotation_types.annotation import ClassificationAnnotation, ObjectAnnotation, VideoClassificationAnnotation
+from ...annotation_types.annotation import ClassificationAnnotation, ObjectAnnotation, VideoClassificationAnnotation, VideoObjectAnnotation
 from ...annotation_types.collection import LabelCollection, LabelGenerator
 from ...annotation_types.data import ImageData, TextData, VideoData
 from ...annotation_types.label import Label
@@ -33,6 +33,7 @@ def to_common(self) -> LabelGenerator:
     def from_common(cls,
                     data: LabelCollection) -> Generator["NDLabel", None, None]:
         for label in data:
+            # print(f"label in data:{label}")
             yield from cls._create_non_video_annotations(label)
             yield from cls._create_video_annotations(label)
 
@@ -45,8 +46,10 @@ def _generate_annotations(
         for data_row_id, annotations in grouped_annotations.items():
             annots = []
             for annotation in annotations:
+
                 if isinstance(annotation, NDObjectType.__args__):
                     annots.append(NDObject.to_common(annotation))
+                #TODO: have a check on if the return type needs to extend or not
                 elif isinstance(annotation, NDClassificationType.__args__):
                     annots.extend(NDClassification.to_common(annotation))
                 elif isinstance(annotation,
@@ -65,7 +68,7 @@ def _infer_media_type(
         types = {type(annotation) for annotation in annotations}
         if TextEntity in types:
             return TextData
-        elif VideoClassificationAnnotation in types:
+        elif VideoClassificationAnnotation in types or VideoObjectAnnotation in types:
             return VideoData
         else:
             return ImageData
@@ -83,26 +86,67 @@ def _get_consecutive_frames(
     def _create_video_annotations(
         cls, label: Label
     ) -> Generator[Union[NDChecklistSubclass, NDRadioSubclass], None, None]:
+
         video_annotations = defaultdict(list)
         for annot in label.annotations:
-            if isinstance(annot, VideoClassificationAnnotation):
+            if isinstance(
+                    annot,
+                (VideoClassificationAnnotation, VideoObjectAnnotation)):
                 video_annotations[annot.feature_schema_id].append(annot)
 
+        #break this into two groups, classifications, and then objects
         for annotation_group in video_annotations.values():
             consecutive_frames = cls._get_consecutive_frames(
                 sorted([annotation.frame for annotation in annotation_group]))
-            annotation = annotation_group[0]
-            frames_data = []
-            for frames in consecutive_frames:
-                frames_data.append({'start': frames[0], 'end': frames[-1]})
-            annotation.extra.update({'frames': frames_data})
-            yield NDClassification.from_common(annotation, label.data)
+
+            if isinstance(annotation_group[0], VideoClassificationAnnotation):
+                annotation = annotation_group[0]
+                frames_data = []
+                for frames in consecutive_frames:
+                    frames_data.append({'start': frames[0], 'end': frames[-1]})
+                annotation.extra.update({'frames': frames_data})
+                yield NDClassification.from_common(annotation, label.data)
+
+            elif isinstance(annotation_group[0], VideoObjectAnnotation):
+                segments = []
+                for start_frame, end_frame in consecutive_frames:
+                    segment = []
+                    for annotation in annotation_group:
+                        if annotation.keyframe and start_frame <= annotation.frame <= end_frame:
+                            segment.append(annotation)
+                    segments.append(segment)
+
+                print(segments[0], "\n")
+                print(segments[1], "\n")
+                print(consecutive_frames)
+                yield NDObject.from_common(segments, label.data)
+                # segments = []
+                # seg_frames = []
+                # for cframes in consecutive_frames:
+                #     seg_frames.append(cframes[0])
+                #     seg_frames.append(cframes[1])
+                # print(seg_frames)
+                # for annotation in annotation_group:
+                #     if annotation.frame in seg_frames:
+                #         segments.append(annotation)
+                #     # if annotation.keyframe:
+                # #         segments.append(annotation)
+                # # print(consecutive_frames)
+                # #TODO: current issue is that the way the code is written doesn't account for
+                # #which frames are consecutive. maybe we should just have list of segments
+                # annotations = []
+                # for annotation in segments:
+                #     annotations.append(
+                #         NDObject.from_common(annotation, label.data))
+                # yield annotations[0]
+                # yield {}
 
     @classmethod
     def _create_non_video_annotations(cls, label: Label):
         non_video_annotations = [
             annot for annot in label.annotations
-            if not isinstance(annot, VideoClassificationAnnotation)
+            if not isinstance(annot, (VideoClassificationAnnotation,
+                                      VideoObjectAnnotation))
         ]
         for annotation in non_video_annotations:
             if isinstance(annotation, ClassificationAnnotation):
diff --git a/labelbox/data/serialization/ndjson/objects.py b/labelbox/data/serialization/ndjson/objects.py
@@ -3,15 +3,18 @@
 from typing import Any, Dict, List, Tuple, Union
 import base64
 import numpy as np
+from py import process
 
 from pydantic import BaseModel
 from PIL import Image
 
+from labelbox.data.annotation_types.data.video import VideoData
+
 from ...annotation_types.data import ImageData, TextData, MaskData
 from ...annotation_types.ner import TextEntity
 from ...annotation_types.types import Cuid
 from ...annotation_types.geometry import Rectangle, Polygon, Line, Point, Mask
-from ...annotation_types.annotation import ClassificationAnnotation, ObjectAnnotation
+from ...annotation_types.annotation import ClassificationAnnotation, ObjectAnnotation, VideoObjectAnnotation
 from .classification import NDSubclassification, NDSubclassificationType
 from .base import DataRow, NDAnnotation
 
@@ -118,6 +121,72 @@ def from_common(cls, rectangle: Rectangle,
                    classifications=classifications)
 
 
+class NDFrameRectangle(BaseModel):
+    frame: int
+    bbox: Bbox
+
+
+class NDSegment(BaseModel):
+    keyframes: List[NDFrameRectangle]
+
+
+class NDSegments(NDBaseObject):
+    segments: List[NDSegment]
+
+    # uuid: str
+    # schema_id: str
+
+    # def process_segment(self, segment: List[VideoObjectAnnotation]):
+    #     """
+    #     We only care about the annotation.value and frame once we make it here
+    #     """
+    #     # for annotation in segment:
+    #     return [{
+    #         "frame":
+    #             annotation.frame,
+    #         "bbox":
+    #             Bbox(top=annotation.value.start.y,
+    #                  left=annotation.value.start.x,
+    #                  height=annotation.value.end.y - annotation.value.start.y,
+    #                  width=annotation.value.end.x - annotation.value.start.x)
+    #     } for annotation in segment]
+
+    def to_common(self):
+        pass
+
+    @classmethod
+    def from_common(cls, segments: List[VideoObjectAnnotation], data: VideoData,
+                    feature_schema_id: Cuid, extra: Dict[str,
+                                                         Any]) -> "NDSegments":
+        print(f"\nWE MADE IT HERE TO SEGMENTS\n")
+        # for segment in segments:
+        # print("\nSEGMENT\n", segment)
+        # processed_segment = cls.process_segment(cls, segment)
+
+        # segments = [{
+        #     "keyframes": [{
+        #         "frame": 3,
+        #         "bbox": Bbox(top=0, left=0, height=1, width=1)
+        #     }]
+        # }, {
+        #     "keyframes": [{
+        #         "frame": 5,
+        #         "bbox": Bbox(top=0, left=0, height=3, width=5)
+        #     }]
+        # }]
+
+        segments = [{"keyframes": segment} for segment in segments]
+
+        print("wew\n", segments[0], "sss\n")
+
+        a = cls(segments=segments,
+                dataRow=DataRow(id=data.uid),
+                schema_id=feature_schema_id,
+                uuid=extra.get('uuid'))
+        print("A\n", a, "\nZ")
+        return a
+
+
 class _URIMask(BaseModel):
     instanceURI: str
     colorRGB: Tuple[int, int, int]
@@ -211,6 +280,21 @@ def from_common(
         cls, annotation: ObjectAnnotation, data: Union[ImageData, TextData]
     ) -> Union[NDLine, NDPoint, NDPolygon, NDRectangle, NDMask, NDTextEntity]:
         obj = cls.lookup_object(annotation)
+
+        #if it is video segments
+        if (obj == NDSegments):
+            print("hello i am ndsegment")
+            #look into segment of segments
+            #look into annotations of segment
+            #check and validate that there are no subclasses
+            #new method for processing segments ?
+
+            return obj.from_common(
+                annotation,
+                data,
+                feature_schema_id=annotation[0][0].feature_schema_id,
+                extra=annotation[0][0].extra)
+
         subclasses = [
             NDSubclassification.from_common(annot)
             for annot in annotation.classifications
@@ -221,20 +305,32 @@ def from_common(
 
     @staticmethod
     def lookup_object(annotation: ObjectAnnotation) -> "NDObjectType":
-        result = {
-            Line: NDLine,
-            Point: NDPoint,
-            Polygon: NDPolygon,
-            Rectangle: NDRectangle,
-            Mask: NDMask,
-            TextEntity: NDTextEntity
-        }.get(type(annotation.value))
+        if isinstance(annotation, list):
+            result = NDSegments
+        else:
+            result = {
+                Line: NDLine,
+                Point: NDPoint,
+                Polygon: NDPolygon,
+                Rectangle: NDRectangle,
+                Mask: NDMask,
+                TextEntity: NDTextEntity
+            }.get(type(annotation.value))
         if result is None:
             raise TypeError(
                 f"Unable to convert object to MAL format. `{type(annotation.value)}`"
             )
         return result
 
+    # @staticmethod
+    # def process_segments(segments: List[List[VideoObjectAnnotation]]):
+    #     for segment in segments:
+    #         for annotation in segment:
+    #             subclasses = [
+    #                 NDSubclassification.from_common(annot)
+    #                 for annot in annotation.classifications
+    #             ]
+
 
 NDObjectType = Union[NDLine, NDPolygon, NDPoint, NDRectangle, NDMask,
                      NDTextEntity]