chore: new new interface

rishisurana-labelbox · rishisurana-labelbox · commit 471c6189a18b · 2025-10-02T16:19:55.000-07:00
diff --git a/examples/annotation_import/audio.ipynb b/examples/annotation_import/audio.ipynb
diff --git a/libs/labelbox/src/labelbox/data/annotation_types/__init__.py b/libs/labelbox/src/labelbox/data/annotation_types/__init__.py
@@ -30,6 +30,7 @@
 from .classification import ClassificationAnswer
 from .classification import Radio
 from .classification import Text
+from .classification import FrameLocation
 
 from .data import GenericDataRowData
 from .data import MaskData
diff --git a/libs/labelbox/src/labelbox/data/annotation_types/audio.py b/libs/labelbox/src/labelbox/data/annotation_types/audio.py
@@ -1,13 +1,14 @@
-from typing import Optional
+from typing import Optional, List
 from pydantic import Field, AliasChoices
 
 from labelbox.data.annotation_types.annotation import (
     ClassificationAnnotation,
 )
+from labelbox.data.annotation_types.classification.classification import FrameLocation
 
 
 class AudioClassificationAnnotation(ClassificationAnnotation):
-    """Audio classification for specific time range
+    """Audio classification for specific time range(s)
 
     Examples:
     - Speaker identification from 2500ms to 4100ms
@@ -18,19 +19,10 @@ class AudioClassificationAnnotation(ClassificationAnnotation):
         name (Optional[str]): Name of the classification
         feature_schema_id (Optional[Cuid]): Feature schema identifier
         value (Union[Text, Checklist, Radio]): Classification value
-        start_frame (int): The frame index in milliseconds (e.g., 2500 = 2.5 seconds)
-        end_frame (Optional[int]): End frame in milliseconds (for time ranges)
+        frames (Optional[List[FrameLocation]]): List of frame ranges (in milliseconds)
         segment_index (Optional[int]): Index of audio segment this annotation belongs to
         extra (Dict[str, Any]): Additional metadata
     """
 
-    start_frame: int = Field(
-        validation_alias=AliasChoices("start_frame", "frame"),
-        serialization_alias="start_frame",
-    )
-    end_frame: Optional[int] = Field(
-        default=None,
-        validation_alias=AliasChoices("end_frame", "endFrame"),
-        serialization_alias="end_frame",
-    )
+    frames: Optional[List[FrameLocation]] = None
     segment_index: Optional[int] = None
diff --git a/libs/labelbox/src/labelbox/data/annotation_types/classification/__init__.py b/libs/labelbox/src/labelbox/data/annotation_types/classification/__init__.py
@@ -1 +1 @@
-from .classification import Checklist, ClassificationAnswer, Radio, Text
+from .classification import Checklist, ClassificationAnswer, Radio, Text, FrameLocation
diff --git a/libs/labelbox/src/labelbox/data/annotation_types/classification/classification.py b/libs/labelbox/src/labelbox/data/annotation_types/classification/classification.py
@@ -7,6 +7,12 @@
 from ..feature import FeatureSchema
 
 
+class FrameLocation(BaseModel):
+    """Represents a temporal frame range with start and end times (in milliseconds)."""
+    start: int
+    end: int
+
+
 class ClassificationAnswer(FeatureSchema, ConfidenceMixin, CustomMetricsMixin):
     """
     - Represents a classification option.
@@ -18,14 +24,17 @@ class ClassificationAnswer(FeatureSchema, ConfidenceMixin, CustomMetricsMixin):
         So unlike object annotations, classification annotations
           track keyframes at a classification answer level.
 
-    - For temporal classifications (audio/video), optional start_frame/end_frame can specify
-      the time range for this answer. Must be within root annotation's frame range.
-      Defaults to root frame range if not specified.
+    - For temporal classifications (audio/video), optional frames can specify
+      one or more time ranges for this answer. Must be within root annotation's frame ranges.
+      Defaults to root frame ranges if not specified.
     """
 
     extra: Dict[str, Any] = {}
     keyframe: Optional[bool] = None
     classifications: Optional[List["ClassificationAnnotation"]] = None
+    frames: Optional[List[FrameLocation]] = None
+
+    # Deprecated: use frames instead
     start_frame: Optional[int] = None
     end_frame: Optional[int] = None
 
@@ -75,12 +84,14 @@ class ClassificationAnnotation(
         classifications (Optional[List[ClassificationAnnotation]]): Optional sub classification of the annotation
         feature_schema_id (Optional[Cuid])
         value (Union[Text, Checklist, Radio])
-        start_frame (Optional[int]): Start frame for temporal classifications (audio/video). Must be within root annotation's frame range. Defaults to root start_frame if not specified.
-        end_frame (Optional[int]): End frame for temporal classifications (audio/video). Must be within root annotation's frame range. Defaults to root end_frame if not specified.
+        frames (Optional[List[FrameLocation]]): Frame ranges for temporal classifications (audio/video). Must be within root annotation's frame ranges. Defaults to root frames if not specified.
         extra (Dict[str, Any])
     """
 
     value: Union[Text, Checklist, Radio]
     message_id: Optional[str] = None
+    frames: Optional[List[FrameLocation]] = None
+
+    # Deprecated: use frames instead
     start_frame: Optional[int] = None
     end_frame: Optional[int] = None
diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/temporal.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/temporal.py
@@ -162,7 +162,16 @@ def _process_radio(
         ):
             all_nested.extend(ann.value.answer.classifications)
 
-    entry = {"name": opt_name, "frames": all_frames}
+    # Deduplicate frames
+    seen = set()
+    unique_frames = []
+    for frame in all_frames:
+        frame_tuple = (frame["start"], frame["end"])
+        if frame_tuple not in seen:
+            seen.add(frame_tuple)
+            unique_frames.append(frame)
+
+    entry = {"name": opt_name, "frames": unique_frames}
 
     # Recursively process nested
     if all_nested:
@@ -196,7 +205,16 @@ def _process_text(
         if hasattr(ann, "classifications") and ann.classifications:
             all_nested.extend(ann.classifications)
 
-    entry = {"value": text_value, "frames": all_frames}
+    # Deduplicate frames
+    seen = set()
+    unique_frames = []
+    for frame in all_frames:
+        frame_tuple = (frame["start"], frame["end"])
+        if frame_tuple not in seen:
+            seen.add(frame_tuple)
+            unique_frames.append(frame)
+
+    entry = {"value": text_value, "frames": unique_frames}
 
     # Recursively process nested
     if all_nested:
@@ -304,7 +322,16 @@ def _process_nested_radio(classifications: List[Any]) -> Dict[str, Any]:
         ):
             all_nested.extend(cls.value.answer.classifications)
 
-    entry = {"name": opt_name, "frames": all_frames}
+    # Deduplicate frames
+    seen = set()
+    unique_frames = []
+    for frame in all_frames:
+        frame_tuple = (frame["start"], frame["end"])
+        if frame_tuple not in seen:
+            seen.add(frame_tuple)
+            unique_frames.append(frame)
+
+    entry = {"name": opt_name, "frames": unique_frames}
 
     if all_nested:
         nested = _process_nested_classifications(all_nested)
@@ -333,7 +360,16 @@ def _process_nested_text(classifications: List[Any]) -> Dict[str, Any]:
         if hasattr(cls, "classifications") and cls.classifications:
             all_nested.extend(cls.classifications)
 
-    entry = {"value": text_value, "frames": all_frames}
+    # Deduplicate frames
+    seen = set()
+    unique_frames = []
+    for frame in all_frames:
+        frame_tuple = (frame["start"], frame["end"])
+        if frame_tuple not in seen:
+            seen.add(frame_tuple)
+            unique_frames.append(frame)
+
+    entry = {"value": text_value, "frames": unique_frames}
 
     if all_nested:
         nested = _process_nested_classifications(all_nested)
@@ -347,18 +383,30 @@ def _extract_frames(
     obj: Any, fallback_frames: List[Dict[str, int]]
 ) -> List[Dict[str, int]]:
     """
-    Extract frame range from an object (annotation, answer, or classification).
+    Extract frame ranges from an object (annotation, answer, or classification).
     Uses explicit frames if available, otherwise falls back to provided frames.
+
+    Supports both:
+    - New format: frames: List[FrameLocation]
+    - Legacy format: start_frame/end_frame (single range)
     """
-    if (
+    # New format: frames list
+    if hasattr(obj, "frames") and obj.frames is not None:
+        return [{"start": frame.start, "end": frame.end} for frame in obj.frames]
+
+    # Legacy format: single start_frame/end_frame
+    elif (
         hasattr(obj, "start_frame")
         and obj.start_frame is not None
         and hasattr(obj, "end_frame")
         and obj.end_frame is not None
     ):
         return [{"start": obj.start_frame, "end": obj.end_frame}]
+
+    # Fallback to parent frames
     elif fallback_frames:
         return fallback_frames
+
     else:
         return []
 
@@ -406,6 +454,14 @@ def create_audio_ndjson_annotations(
     def audio_frame_extractor(
         ann: AudioClassificationAnnotation,
     ) -> Tuple[int, int]:
+        """
+        Legacy frame extractor for AudioClassificationAnnotation.
+        Only used when frames list is not provided.
+        """
+        # Return first frame if frames list exists
+        if ann.frames and len(ann.frames) > 0:
+            return (ann.frames[0].start, ann.frames[0].end)
+        # Fall back to legacy start_frame/end_frame
         return (ann.start_frame, ann.end_frame or ann.start_frame)
 
     return create_temporal_ndjson_annotations(
diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_audio.py b/libs/labelbox/tests/data/serialization/ndjson/test_audio.py

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-from .classification import Checklist, ClassificationAnswer, Radio, Text`
	`1`	`+from .classification import Checklist, ClassificationAnswer, Radio, Text, FrameLocation`