Skip to content

Commit 16896fd

Browse files
chore: it works for temporal text/radio/checklist classifications
1 parent ff298d4 commit 16896fd

File tree

4 files changed

+60
-69
lines changed

4 files changed

+60
-69
lines changed

examples/annotation_import/audio_temporal.ipynb

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,11 @@
4949
"\n",
5050
"## Key Features\n",
5151
"\n",
52-
"- **Time-based API**: Use seconds for user-friendly input\n",
53-
"- **Frame-based storage**: Internally uses milliseconds (1 frame = 1ms)\n",
52+
"- **Millisecond-based API**: Direct millisecond input for precise timing control\n",
53+
"- **Video-compatible structure**: Matches video temporal annotation pattern exactly\n",
54+
"- **Keyframe serialization**: Proper NDJSON structure for frontend timeline display\n",
5455
"- **MAL compatible**: Works with existing Model-Assisted Labeling pipeline\n",
55-
"- **UI compatible**: Uses existing video timeline components\n",
56+
"- **UI compatible**: Uses existing video timeline components seamlessly\n",
5657
"\n",
5758
"## Import Methods\n",
5859
"\n",

libs/labelbox/src/labelbox/data/annotation_types/audio.py

Lines changed: 4 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -17,42 +17,14 @@ class AudioClassificationAnnotation(ClassificationAnnotation):
1717
feature_schema_id (Optional[Cuid]): Feature schema identifier
1818
value (Union[Text, Checklist, Radio]): Classification value
1919
frame (int): The frame index in milliseconds (e.g., 2500 = 2.5 seconds)
20+
end_frame (Optional[int]): End frame in milliseconds (for time ranges)
2021
segment_index (Optional[int]): Index of audio segment this annotation belongs to
2122
extra (Dict[str, Any]): Additional metadata
2223
"""
2324

2425
frame: int
26+
end_frame: Optional[int] = None
2527
segment_index: Optional[int] = None
26-
27-
@classmethod
28-
def from_time_range(cls, start_ms: int, end_ms: int, **kwargs):
29-
"""Create from milliseconds (user-friendly) to frames (internal)
30-
31-
Args:
32-
start_ms (int): Start time in milliseconds
33-
end_ms (int): End time in milliseconds
34-
**kwargs: Additional arguments for the annotation
35-
36-
Returns:
37-
AudioClassificationAnnotation: Annotation with frame set to start_ms
38-
39-
Example:
40-
>>> AudioClassificationAnnotation.from_time_range(
41-
... start_ms=2500, end_ms=4100,
42-
... name="speaker_id",
43-
... value=lb_types.Radio(answer=lb_types.ClassificationAnswer(name="john"))
44-
... )
45-
"""
46-
return cls(frame=start_ms, **kwargs)
47-
48-
@property
49-
def start_time(self) -> float:
50-
"""Convert frame to seconds for user-facing APIs
51-
52-
Returns:
53-
float: Time in seconds (e.g., 2500 -> 2.5)
54-
"""
55-
return self.frame / 1000.0
5628

5729

5830
class AudioObjectAnnotation(ObjectAnnotation, ConfidenceNotSupportedMixin, CustomMetricsNotSupportedMixin):
@@ -68,42 +40,14 @@ class AudioObjectAnnotation(ObjectAnnotation, ConfidenceNotSupportedMixin, Custo
6840
feature_schema_id (Optional[Cuid]): Feature schema identifier
6941
value (Union[TextEntity, Geometry]): Localization or text content
7042
frame (int): The frame index in milliseconds (e.g., 10000 = 10.0 seconds)
43+
end_frame (Optional[int]): End frame in milliseconds (for time ranges)
7144
keyframe (bool): Whether this is a keyframe annotation (default: True)
7245
segment_index (Optional[int]): Index of audio segment this annotation belongs to
7346
classifications (Optional[List[ClassificationAnnotation]]): Optional sub-classifications
7447
extra (Dict[str, Any]): Additional metadata
7548
"""
7649

7750
frame: int
51+
end_frame: Optional[int] = None
7852
keyframe: bool = True
7953
segment_index: Optional[int] = None
80-
81-
@classmethod
82-
def from_time_range(cls, start_ms: int, end_ms: int, **kwargs):
83-
"""Create from milliseconds (user-friendly) to frames (internal)
84-
85-
Args:
86-
start_ms (int): Start time in milliseconds
87-
end_ms (int): End time in milliseconds
88-
**kwargs: Additional arguments for the annotation
89-
90-
Returns:
91-
AudioObjectAnnotation: Annotation with frame set to start_ms
92-
93-
Example:
94-
>>> AudioObjectAnnotation.from_time_range(
95-
... start_ms=10000, end_ms=12500,
96-
... name="transcription",
97-
... value=lb_types.TextEntity(text="Hello world")
98-
... )
99-
"""
100-
return cls(frame=start_ms, **kwargs)
101-
102-
@property
103-
def start_time(self) -> float:
104-
"""Convert frame to seconds for user-facing APIs
105-
106-
Returns:
107-
float: Time in seconds (e.g., 10000 -> 10.0)
108-
"""
109-
return self.frame / 1000.0

libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ def from_common(
224224
# ====== End of subclasses
225225

226226

227-
class NDText(NDAnnotation, NDTextSubclass):
227+
class NDText(NDAnnotation, NDTextSubclass, VideoSupported):
228228
@classmethod
229229
def from_common(
230230
cls,
@@ -243,6 +243,7 @@ def from_common(
243243
name=name,
244244
schema_id=feature_schema_id,
245245
uuid=uuid,
246+
frames=extra.get("frames"),
246247
message_id=message_id,
247248
confidence=text.confidence,
248249
custom_metrics=text.custom_metrics,

libs/labelbox/src/labelbox/data/serialization/ndjson/label.py

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -186,12 +186,57 @@ def _create_audio_annotations(
186186
)
187187

188188
for annotation_group in audio_annotations.values():
189-
# For audio, treat each annotation as a single frame (no segments needed)
190189
if isinstance(annotation_group[0], AudioClassificationAnnotation):
191-
annotation = annotation_group[0]
192-
# Add frame information to extra (milliseconds)
193-
annotation.extra.update({"frame": annotation.frame})
194-
yield NDClassification.from_common(annotation, label.data)
190+
# For TEXT classifications, group them into one feature with multiple keyframes
191+
from ...annotation_types.classification.classification import Text
192+
if isinstance(annotation_group[0].value, Text):
193+
194+
# Group all annotations into one feature with multiple keyframes
195+
# Use first annotation as template but create combined content
196+
annotation = annotation_group[0]
197+
frames_data = []
198+
all_tokens = []
199+
200+
for individual_annotation in annotation_group:
201+
frame = individual_annotation.frame
202+
end_frame = individual_annotation.end_frame if hasattr(individual_annotation, 'end_frame') and individual_annotation.end_frame is not None else frame
203+
frames_data.append({"start": frame, "end": end_frame})
204+
all_tokens.append(individual_annotation.value.answer)
205+
206+
# For per-token annotations, embed token mapping in the content
207+
# Create a JSON structure that includes both the default text and token mapping
208+
import json
209+
token_mapping = {}
210+
for individual_annotation in annotation_group:
211+
frame = individual_annotation.frame
212+
token_mapping[str(frame)] = individual_annotation.value.answer
213+
214+
# Embed token mapping in the answer field as JSON
215+
content_with_mapping = {
216+
"default_text": " ".join(all_tokens), # Fallback text
217+
"token_mapping": token_mapping # Per-keyframe content
218+
}
219+
from ...annotation_types.classification.classification import Text
220+
annotation.value = Text(answer=json.dumps(content_with_mapping))
221+
222+
# Update the annotation with frames data
223+
annotation.extra = {"frames": frames_data}
224+
yield NDClassification.from_common(annotation, label.data)
225+
else:
226+
# For non-TEXT classifications, process each individually
227+
for annotation in annotation_group:
228+
229+
# Ensure frame data is properly formatted in extra field
230+
if hasattr(annotation, 'frame') and annotation.frame is not None:
231+
if not annotation.extra:
232+
annotation.extra = {}
233+
234+
if 'frames' not in annotation.extra:
235+
end_frame = annotation.end_frame if hasattr(annotation, 'end_frame') and annotation.end_frame is not None else annotation.frame
236+
frames_data = [{"start": annotation.frame, "end": end_frame}]
237+
annotation.extra.update({"frames": frames_data})
238+
239+
yield NDClassification.from_common(annotation, label.data)
195240

196241
elif isinstance(annotation_group[0], AudioObjectAnnotation):
197242
# For audio objects, treat like single video frame

0 commit comments

Comments
 (0)