Skip to content

Commit 471c618

Browse files
chore: new new interface
1 parent 66e4c44 commit 471c618

File tree

7 files changed

+433
-402
lines changed

7 files changed

+433
-402
lines changed

examples/annotation_import/audio.ipynb

Lines changed: 323 additions & 327 deletions
Large diffs are not rendered by default.

libs/labelbox/src/labelbox/data/annotation_types/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from .classification import ClassificationAnswer
3131
from .classification import Radio
3232
from .classification import Text
33+
from .classification import FrameLocation
3334

3435
from .data import GenericDataRowData
3536
from .data import MaskData
Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
1-
from typing import Optional
1+
from typing import Optional, List
22
from pydantic import Field, AliasChoices
33

44
from labelbox.data.annotation_types.annotation import (
55
ClassificationAnnotation,
66
)
7+
from labelbox.data.annotation_types.classification.classification import FrameLocation
78

89

910
class AudioClassificationAnnotation(ClassificationAnnotation):
10-
"""Audio classification for specific time range
11+
"""Audio classification for specific time range(s)
1112
1213
Examples:
1314
- Speaker identification from 2500ms to 4100ms
@@ -18,19 +19,10 @@ class AudioClassificationAnnotation(ClassificationAnnotation):
1819
name (Optional[str]): Name of the classification
1920
feature_schema_id (Optional[Cuid]): Feature schema identifier
2021
value (Union[Text, Checklist, Radio]): Classification value
21-
start_frame (int): The frame index in milliseconds (e.g., 2500 = 2.5 seconds)
22-
end_frame (Optional[int]): End frame in milliseconds (for time ranges)
22+
frames (Optional[List[FrameLocation]]): List of frame ranges (in milliseconds)
2323
segment_index (Optional[int]): Index of audio segment this annotation belongs to
2424
extra (Dict[str, Any]): Additional metadata
2525
"""
2626

27-
start_frame: int = Field(
28-
validation_alias=AliasChoices("start_frame", "frame"),
29-
serialization_alias="start_frame",
30-
)
31-
end_frame: Optional[int] = Field(
32-
default=None,
33-
validation_alias=AliasChoices("end_frame", "endFrame"),
34-
serialization_alias="end_frame",
35-
)
27+
frames: Optional[List[FrameLocation]] = None
3628
segment_index: Optional[int] = None
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
from .classification import Checklist, ClassificationAnswer, Radio, Text
1+
from .classification import Checklist, ClassificationAnswer, Radio, Text, FrameLocation

libs/labelbox/src/labelbox/data/annotation_types/classification/classification.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@
77
from ..feature import FeatureSchema
88

99

10+
class FrameLocation(BaseModel):
11+
"""Represents a temporal frame range with start and end times (in milliseconds)."""
12+
start: int
13+
end: int
14+
15+
1016
class ClassificationAnswer(FeatureSchema, ConfidenceMixin, CustomMetricsMixin):
1117
"""
1218
- Represents a classification option.
@@ -18,14 +24,17 @@ class ClassificationAnswer(FeatureSchema, ConfidenceMixin, CustomMetricsMixin):
1824
So unlike object annotations, classification annotations
1925
track keyframes at a classification answer level.
2026
21-
- For temporal classifications (audio/video), optional start_frame/end_frame can specify
22-
the time range for this answer. Must be within root annotation's frame range.
23-
Defaults to root frame range if not specified.
27+
- For temporal classifications (audio/video), optional frames can specify
28+
one or more time ranges for this answer. Must be within root annotation's frame ranges.
29+
Defaults to root frame ranges if not specified.
2430
"""
2531

2632
extra: Dict[str, Any] = {}
2733
keyframe: Optional[bool] = None
2834
classifications: Optional[List["ClassificationAnnotation"]] = None
35+
frames: Optional[List[FrameLocation]] = None
36+
37+
# Deprecated: use frames instead
2938
start_frame: Optional[int] = None
3039
end_frame: Optional[int] = None
3140

@@ -75,12 +84,14 @@ class ClassificationAnnotation(
7584
classifications (Optional[List[ClassificationAnnotation]]): Optional sub classification of the annotation
7685
feature_schema_id (Optional[Cuid])
7786
value (Union[Text, Checklist, Radio])
78-
start_frame (Optional[int]): Start frame for temporal classifications (audio/video). Must be within root annotation's frame range. Defaults to root start_frame if not specified.
79-
end_frame (Optional[int]): End frame for temporal classifications (audio/video). Must be within root annotation's frame range. Defaults to root end_frame if not specified.
87+
frames (Optional[List[FrameLocation]]): Frame ranges for temporal classifications (audio/video). Must be within root annotation's frame ranges. Defaults to root frames if not specified.
8088
extra (Dict[str, Any])
8189
"""
8290

8391
value: Union[Text, Checklist, Radio]
8492
message_id: Optional[str] = None
93+
frames: Optional[List[FrameLocation]] = None
94+
95+
# Deprecated: use frames instead
8596
start_frame: Optional[int] = None
8697
end_frame: Optional[int] = None

libs/labelbox/src/labelbox/data/serialization/ndjson/temporal.py

Lines changed: 62 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,16 @@ def _process_radio(
162162
):
163163
all_nested.extend(ann.value.answer.classifications)
164164

165-
entry = {"name": opt_name, "frames": all_frames}
165+
# Deduplicate frames
166+
seen = set()
167+
unique_frames = []
168+
for frame in all_frames:
169+
frame_tuple = (frame["start"], frame["end"])
170+
if frame_tuple not in seen:
171+
seen.add(frame_tuple)
172+
unique_frames.append(frame)
173+
174+
entry = {"name": opt_name, "frames": unique_frames}
166175

167176
# Recursively process nested
168177
if all_nested:
@@ -196,7 +205,16 @@ def _process_text(
196205
if hasattr(ann, "classifications") and ann.classifications:
197206
all_nested.extend(ann.classifications)
198207

199-
entry = {"value": text_value, "frames": all_frames}
208+
# Deduplicate frames
209+
seen = set()
210+
unique_frames = []
211+
for frame in all_frames:
212+
frame_tuple = (frame["start"], frame["end"])
213+
if frame_tuple not in seen:
214+
seen.add(frame_tuple)
215+
unique_frames.append(frame)
216+
217+
entry = {"value": text_value, "frames": unique_frames}
200218

201219
# Recursively process nested
202220
if all_nested:
@@ -304,7 +322,16 @@ def _process_nested_radio(classifications: List[Any]) -> Dict[str, Any]:
304322
):
305323
all_nested.extend(cls.value.answer.classifications)
306324

307-
entry = {"name": opt_name, "frames": all_frames}
325+
# Deduplicate frames
326+
seen = set()
327+
unique_frames = []
328+
for frame in all_frames:
329+
frame_tuple = (frame["start"], frame["end"])
330+
if frame_tuple not in seen:
331+
seen.add(frame_tuple)
332+
unique_frames.append(frame)
333+
334+
entry = {"name": opt_name, "frames": unique_frames}
308335

309336
if all_nested:
310337
nested = _process_nested_classifications(all_nested)
@@ -333,7 +360,16 @@ def _process_nested_text(classifications: List[Any]) -> Dict[str, Any]:
333360
if hasattr(cls, "classifications") and cls.classifications:
334361
all_nested.extend(cls.classifications)
335362

336-
entry = {"value": text_value, "frames": all_frames}
363+
# Deduplicate frames
364+
seen = set()
365+
unique_frames = []
366+
for frame in all_frames:
367+
frame_tuple = (frame["start"], frame["end"])
368+
if frame_tuple not in seen:
369+
seen.add(frame_tuple)
370+
unique_frames.append(frame)
371+
372+
entry = {"value": text_value, "frames": unique_frames}
337373

338374
if all_nested:
339375
nested = _process_nested_classifications(all_nested)
@@ -347,18 +383,30 @@ def _extract_frames(
347383
obj: Any, fallback_frames: List[Dict[str, int]]
348384
) -> List[Dict[str, int]]:
349385
"""
350-
Extract frame range from an object (annotation, answer, or classification).
386+
Extract frame ranges from an object (annotation, answer, or classification).
351387
Uses explicit frames if available, otherwise falls back to provided frames.
388+
389+
Supports both:
390+
- New format: frames: List[FrameLocation]
391+
- Legacy format: start_frame/end_frame (single range)
352392
"""
353-
if (
393+
# New format: frames list
394+
if hasattr(obj, "frames") and obj.frames is not None:
395+
return [{"start": frame.start, "end": frame.end} for frame in obj.frames]
396+
397+
# Legacy format: single start_frame/end_frame
398+
elif (
354399
hasattr(obj, "start_frame")
355400
and obj.start_frame is not None
356401
and hasattr(obj, "end_frame")
357402
and obj.end_frame is not None
358403
):
359404
return [{"start": obj.start_frame, "end": obj.end_frame}]
405+
406+
# Fallback to parent frames
360407
elif fallback_frames:
361408
return fallback_frames
409+
362410
else:
363411
return []
364412

@@ -406,6 +454,14 @@ def create_audio_ndjson_annotations(
406454
def audio_frame_extractor(
407455
ann: AudioClassificationAnnotation,
408456
) -> Tuple[int, int]:
457+
"""
458+
Legacy frame extractor for AudioClassificationAnnotation.
459+
Only used when frames list is not provided.
460+
"""
461+
# Return first frame if frames list exists
462+
if ann.frames and len(ann.frames) > 0:
463+
return (ann.frames[0].start, ann.frames[0].end)
464+
# Fall back to legacy start_frame/end_frame
409465
return (ann.start_frame, ann.end_frame or ann.start_frame)
410466

411467
return create_temporal_ndjson_annotations(

0 commit comments

Comments
 (0)