Skip to content

Commit 478fb23

Browse files
chore: final nail; interface is simple and works with frame arg
1 parent 471c618 commit 478fb23

File tree

3 files changed

+14
-12
lines changed

3 files changed

+14
-12
lines changed

examples/annotation_import/audio.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -259,14 +259,14 @@
259259
},
260260
{
261261
"metadata": {},
262-
"source": "# Define tokens with precise timing (from demo script)\ntokens_data = [\n (\"Hello\", 586, 770), # Hello: frames 586-770\n (\"AI\", 771, 955), # AI: frames 771-955\n (\"how\", 956, 1140), # how: frames 956-1140\n (\"are\", 1141, 1325), # are: frames 1141-1325\n (\"you\", 1326, 1510), # you: frames 1326-1510\n (\"doing\", 1511, 1695), # doing: frames 1511-1695\n (\"today\", 1696, 1880), # today: frames 1696-1880\n]\n\n# Create temporal annotations for each token using NEW frames interface\ntemporal_annotations = []\nfor token, start_frame, end_frame in tokens_data:\n token_annotation = lb_types.AudioClassificationAnnotation(\n frames=[lb_types.FrameLocation(start=start_frame, end=end_frame)],\n name=\"User Speaker\",\n value=lb_types.Text(answer=token),\n )\n temporal_annotations.append(token_annotation)\n\nprint(f\"Created {len(temporal_annotations)} temporal token annotations\")",
262+
"source": "# Define tokens with precise timing\ntokens_data = [\n (\"Hello\", 586, 770), # Hello: frames 586-770\n (\"AI\", 771, 955), # AI: frames 771-955\n (\"how\", 956, 1140), # how: frames 956-1140\n (\"are\", 1141, 1325), # are: frames 1141-1325\n (\"you\", 1326, 1510), # you: frames 1326-1510\n (\"doing\", 1511, 1695), # doing: frames 1511-1695\n (\"today\", 1696, 1880), # today: frames 1696-1880\n]\n\n# Create temporal annotations for each token\ntemporal_annotations = []\nfor token, start_frame, end_frame in tokens_data:\n token_annotation = lb_types.AudioClassificationAnnotation(\n start_frame=start_frame,\n end_frame=end_frame,\n name=\"User Speaker\",\n value=lb_types.Text(answer=token),\n )\n temporal_annotations.append(token_annotation)\n\nprint(f\"Created {len(temporal_annotations)} temporal token annotations\")",
263263
"cell_type": "code",
264264
"outputs": [],
265265
"execution_count": null
266266
},
267267
{
268268
"metadata": {},
269-
"source": "# Create label with regular and temporal annotations\nlabel_with_temporal = []\nlabel_with_temporal.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[text_annotation, checklist_annotation, radio_annotation] +\n temporal_annotations,\n ))\n\nprint(\n f\"Created label with {len(label_with_temporal[0].annotations)} total annotations\"\n)\nprint(f\" - Regular annotations: 3\")\nprint(f\" - Temporal annotations: {len(temporal_annotations)}\")\n\n# Example: Nested temporal annotation with explicit frame matching\n# Structure: Speaker -> Transcription -> Emotion -> Intensity\n# Each level can have different frame ranges (subsets of parent)\nnested_temporal_annotation = lb_types.AudioClassificationAnnotation(\n frames=[lb_types.FrameLocation(start=100, end=500)],\n name=\"Speaker Analysis\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"User\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"Transcription\",\n value=lb_types.Text(answer=\"Hello there\"),\n frames=[lb_types.FrameLocation(start=100, end=500)],\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"Emotion\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"happy\",\n frames=[lb_types.FrameLocation(start=150, end=450)],\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"Intensity\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"high\",\n frames=[lb_types.FrameLocation(start=200, end=400)]\n )\n )\n )\n ]\n )\n ),\n frames=[lb_types.FrameLocation(start=150, end=450)]\n )\n ]\n )\n ]\n )\n )\n)\n\nprint(\"\\nNested temporal annotation created:\")\nprint(\" - Speaker: 100-500ms\")\nprint(\" → Transcription: 100-500ms\")\nprint(\" → Emotion: 150-450ms (subset)\")\nprint(\" → Intensity: 200-400ms (subset)\")\n",
269+
"source": "# Create label with regular and temporal annotations\nlabel_with_temporal = []\nlabel_with_temporal.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[text_annotation, checklist_annotation, radio_annotation] +\n temporal_annotations,\n ))\n\nprint(\n f\"Created label with {len(label_with_temporal[0].annotations)} total annotations\"\n)\nprint(f\" - Regular annotations: 3\")\nprint(f\" - Temporal annotations: {len(temporal_annotations)}\")\n\n# Example: Nested temporal annotation with hierarchical classifications\n# Structure: Speaker -> Transcription -> Emotion -> Intensity\n# Parent uses start_frame/end_frame, nested items use frames for discontinuous ranges\nnested_temporal_annotation = lb_types.AudioClassificationAnnotation(\n start_frame=100,\n end_frame=500,\n name=\"Speaker Analysis\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"User\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"Transcription\",\n value=lb_types.Text(answer=\"Hello there\"),\n frames=[lb_types.FrameLocation(start=100, end=500)],\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"Emotion\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"happy\",\n frames=[lb_types.FrameLocation(start=150, end=450)],\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"Intensity\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"high\",\n frames=[lb_types.FrameLocation(start=200, end=400)]\n )\n )\n )\n ]\n )\n ),\n frames=[lb_types.FrameLocation(start=150, end=450)]\n )\n ]\n )\n ]\n )\n )\n)\n\nprint(\"\\nNested temporal annotation created:\")\nprint(\" - Speaker: 100-500ms (parent range)\")\nprint(\" → Transcription: 100-500ms\")\nprint(\" → Emotion: 150-450ms (nested subset)\")\nprint(\" → Intensity: 200-400ms (nested subset)\")\n",
270270
"cell_type": "code",
271271
"outputs": [],
272272
"execution_count": null

libs/labelbox/src/labelbox/data/annotation_types/audio.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99

1010
class AudioClassificationAnnotation(ClassificationAnnotation):
11-
"""Audio classification for specific time range(s)
11+
"""Audio classification for specific time range
1212
1313
Examples:
1414
- Speaker identification from 2500ms to 4100ms
@@ -19,10 +19,19 @@ class AudioClassificationAnnotation(ClassificationAnnotation):
1919
name (Optional[str]): Name of the classification
2020
feature_schema_id (Optional[Cuid]): Feature schema identifier
2121
value (Union[Text, Checklist, Radio]): Classification value
22-
frames (Optional[List[FrameLocation]]): List of frame ranges (in milliseconds)
22+
start_frame (Optional[int]): Start frame in milliseconds
23+
end_frame (Optional[int]): End frame in milliseconds
2324
segment_index (Optional[int]): Index of audio segment this annotation belongs to
2425
extra (Dict[str, Any]): Additional metadata
26+
27+
Note:
28+
Parent AudioClassificationAnnotation uses start_frame/end_frame (single range).
29+
Nested classifications/answers use frames: List[FrameLocation] for discontinuous ranges.
30+
Multiple time ranges for same classification = multiple separate annotation objects.
2531
"""
2632

27-
frames: Optional[List[FrameLocation]] = None
33+
start_frame: Optional[int] = Field(
34+
default=None, validation_alias=AliasChoices("start_frame", "frame")
35+
)
36+
end_frame: Optional[int] = None
2837
segment_index: Optional[int] = None

libs/labelbox/src/labelbox/data/annotation_types/classification/classification.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,6 @@ class ClassificationAnswer(FeatureSchema, ConfidenceMixin, CustomMetricsMixin):
3434
classifications: Optional[List["ClassificationAnnotation"]] = None
3535
frames: Optional[List[FrameLocation]] = None
3636

37-
# Deprecated: use frames instead
38-
start_frame: Optional[int] = None
39-
end_frame: Optional[int] = None
40-
4137

4238
class Radio(ConfidenceMixin, CustomMetricsMixin, BaseModel):
4339
"""A classification with only one selected option allowed
@@ -92,6 +88,3 @@ class ClassificationAnnotation(
9288
message_id: Optional[str] = None
9389
frames: Optional[List[FrameLocation]] = None
9490

95-
# Deprecated: use frames instead
96-
start_frame: Optional[int] = None
97-
end_frame: Optional[int] = None

0 commit comments

Comments
 (0)