diff --git a/examples/README.md b/examples/README.md index 924d1017d..842286b2d 100644 --- a/examples/README.md +++ b/examples/README.md @@ -16,15 +16,25 @@ + + Projects + Open In Github + Open In Colab + Ontologies Open In Github Open In Colab - Quick Start - Open In Github - Open In Colab + Batches + Open In Github + Open In Colab + + + Custom Embeddings + Open In Github + Open In Colab Data Rows @@ -37,25 +47,15 @@ Open In Colab - Batches - Open In Github - Open In Colab - - - Projects - Open In Github - Open In Colab + Quick Start + Open In Github + Open In Colab Data Row Metadata Open In Github Open In Colab - - Custom Embeddings - Open In Github - Open In Colab - User Management Open In Github @@ -75,25 +75,25 @@ + + Export Data + Open In Github + Open In Colab + Export V1 to V2 Migration Support Open In Github Open In Colab - - Exporting to CSV - Open In Github - Open In Colab - Composite Mask Export Open In Github Open In Colab - Export Data - Open In Github - Open In Colab + Exporting to CSV + Open In Github + Open In Colab @@ -143,36 +143,11 @@ - - Tiled - Open In Github - Open In Colab - Text Open In Github Open In Colab - - PDF - Open In Github - Open In Colab - - - Video - Open In Github - Open In Colab - - - Audio - Open In Github - Open In Colab - - - Conversational - Open In Github - Open In Colab - HTML Open In Github @@ -188,11 +163,36 @@ Open In Github Open In Colab + + Video + Open In Github + Open In Colab + + + Audio + Open In Github + Open In Colab + Conversational LLM Open In Github Open In Colab + + Tiled + Open In Github + Open In Colab + + + PDF + Open In Github + Open In Colab + + + Conversational + Open In Github + Open In Colab + @@ -208,9 +208,9 @@ - Langchain - Open In Github - Open In Colab + Meta SAM + Open In Github + Open In Colab Meta SAM Video @@ -218,20 +218,20 @@ Open In Colab - Meta SAM - Open In Github - Open In Colab + Huggingface Custom Embeddings + Open In Github + Open In Colab + + + Langchain + Open In Github + Open In Colab Import YOLOv8 Annotations Open In Github Open In Colab - - Huggingface Custom Embeddings - Open In Github - Open In Colab - @@ -247,25 +247,25 @@ - Model Predictions to Project - Open In Github - Open In Colab + Custom Metrics Basics + Open In Github + Open In Colab Custom Metrics Demo Open In Github Open In Colab - - Custom Metrics Basics - Open In Github - Open In Colab - Model Slices Open In Github Open In Colab + + Model Predictions to Project + Open In Github + Open In Colab + @@ -280,25 +280,15 @@ - - HTML Predictions - Open In Github - Open In Colab - Text Predictions Open In Github Open In Colab - Video Predictions - Open In Github - Open In Colab - - - Conversational Predictions - Open In Github - Open In Colab + PDF Predictions + Open In Github + Open In Colab Geospatial Predictions @@ -306,9 +296,14 @@ Open In Colab - PDF Predictions - Open In Github - Open In Colab + Conversational Predictions + Open In Github + Open In Colab + + + Video Predictions + Open In Github + Open In Colab Image Predictions @@ -320,6 +315,11 @@ Open In Github Open In Colab + + HTML Predictions + Open In Github + Open In Colab + diff --git a/examples/annotation_import/audio.ipynb b/examples/annotation_import/audio.ipynb index 437130a9e..0de1b193e 100644 --- a/examples/annotation_import/audio.ipynb +++ b/examples/annotation_import/audio.ipynb @@ -170,7 +170,7 @@ }, { "metadata": {}, - "source": "ontology_builder = lb.OntologyBuilder(classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"text_audio\"),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_audio\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_audio\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n])\n\nontology = client.create_ontology(\n \"Ontology Audio Annotations\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Audio,\n)", + "source": "ontology_builder = lb.OntologyBuilder(classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"text_audio\"),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_audio\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_audio\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n # Temporal classification for token-level annotations\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n name=\"User Speaker\",\n scope=lb.Classification.Scope.INDEX, # INDEX scope for temporal\n ),\n])\n\nontology = client.create_ontology(\n \"Ontology Audio Annotations\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Audio,\n)", "cell_type": "code", "outputs": [], "execution_count": null @@ -252,6 +252,40 @@ ], "cell_type": "markdown" }, + { + "metadata": {}, + "source": [ + "## Temporal Audio Annotations\n", + "\n", + "Labelbox supports temporal annotations for audio/video with frame-level precision using the new temporal classification API.\n", + "\n", + "### Key Features:\n", + "- **Frame-based timing**: All annotations use millisecond precision\n", + "- **Deep nesting**: Support for multi-level nested classifications (Text > Text > Text, Radio > Radio > Radio, etc.)\n", + "- **Inductive structures**: Multiple parent values can share nested classifications that are automatically split based on frame overlap\n", + "- **Frame validation**: Frames start at 1 (not 0) and must be non-overlapping for Text and Radio siblings\n", + "\n", + "### Important Constraints:\n", + "1. **Frame indexing**: Frames are 1-based (frame 0 is invalid)\n", + "2. **Non-overlapping siblings**: Text and Radio classifications at the same level cannot have overlapping frame ranges\n", + "3. **Overlapping checklists**: Only Checklist answers can have overlapping frame ranges with their siblings" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "# Define tokens with precise timing (from demo script)\ntokens_data = [\n (\"Hello\", 586, 770), # Hello: frames 586-770\n (\"AI\", 771, 955), # AI: frames 771-955\n (\"how\", 956, 1140), # how: frames 956-1140\n (\"are\", 1141, 1325), # are: frames 1141-1325\n (\"you\", 1326, 1510), # you: frames 1326-1510\n (\"doing\", 1511, 1695), # doing: frames 1511-1695\n (\"today\", 1696, 1880), # today: frames 1696-1880\n]\n\n# Create temporal annotations for each token\ntemporal_annotations = []\nfor token, start_frame, end_frame in tokens_data:\n token_annotation = lb_types.AudioClassificationAnnotation(\n frame=start_frame,\n end_frame=end_frame,\n name=\"User Speaker\",\n value=lb_types.Text(answer=token),\n )\n temporal_annotations.append(token_annotation)\n\nprint(f\"Created {len(temporal_annotations)} temporal token annotations\")", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": "# Create label with both regular and temporal annotations\nlabel_with_temporal = []\nlabel_with_temporal.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[text_annotation, checklist_annotation, radio_annotation] +\n temporal_annotations,\n ))\n\nprint(\n f\"Created label with {len(label_with_temporal[0].annotations)} total annotations\"\n)\nprint(\" - Regular annotations: 3\")\nprint(f\" - Temporal annotations: {len(temporal_annotations)}\")", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, { "metadata": {}, "source": [ @@ -260,6 +294,13 @@ ], "cell_type": "markdown" }, + { + "metadata": {}, + "source": "# Upload temporal annotations via MAL\ntemporal_upload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=f\"temporal_mal_job-{str(uuid.uuid4())}\",\n predictions=label_with_temporal,\n)\n\ntemporal_upload_job.wait_until_done()\nprint(\"Temporal upload completed!\")\nprint(\"Errors:\", temporal_upload_job.errors)\nprint(\"Status:\", temporal_upload_job.statuses)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, { "metadata": {}, "source": "# Upload our label using Model-Assisted Labeling\nupload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=f\"mal_job-{str(uuid.uuid4())}\",\n predictions=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", diff --git a/libs/labelbox/src/labelbox/data/annotation_types/__init__.py b/libs/labelbox/src/labelbox/data/annotation_types/__init__.py index fc75652cf..addfb8836 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/__init__.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/__init__.py @@ -19,6 +19,10 @@ from .video import MaskInstance from .video import VideoMaskAnnotation +from .temporal import TemporalClassificationText +from .temporal import TemporalClassificationQuestion +from .temporal import TemporalClassificationAnswer + from .ner import ConversationEntity from .ner import DocumentEntity from .ner import DocumentTextSelection @@ -59,3 +63,70 @@ MessageRankingTask, MessageEvaluationTaskAnnotation, ) + +__all__ = [ + # Geometry + "Line", + "Point", + "Mask", + "Polygon", + "Rectangle", + "Geometry", + "DocumentRectangle", + "RectangleUnit", + # Annotation + "ClassificationAnnotation", + "ObjectAnnotation", + # Relationship + "RelationshipAnnotation", + "Relationship", + # Video + "VideoClassificationAnnotation", + "VideoObjectAnnotation", + "MaskFrame", + "MaskInstance", + "VideoMaskAnnotation", + # Temporal + "TemporalClassificationText", + "TemporalClassificationQuestion", + "TemporalClassificationAnswer", + # NER + "ConversationEntity", + "DocumentEntity", + "DocumentTextSelection", + "TextEntity", + # Classification + "Checklist", + "ClassificationAnswer", + "Radio", + "Text", + # Data + "GenericDataRowData", + "MaskData", + # Label + "Label", + "LabelGenerator", + # Metrics + "ScalarMetric", + "ScalarMetricAggregation", + "ConfusionMatrixMetric", + "ConfusionMatrixAggregation", + "ScalarMetricValue", + "ConfusionMatrixMetricValue", + # Tiled Image + "EPSG", + "EPSGTransformer", + "TiledBounds", + "TiledImageData", + "TileLayer", + # LLM Prompt Response + "PromptText", + "PromptClassificationAnnotation", + # MMC + "MessageInfo", + "OrderedMessageInfo", + "MessageSingleSelectionTask", + "MessageMultiSelectionTask", + "MessageRankingTask", + "MessageEvaluationTaskAnnotation", +] diff --git a/libs/labelbox/src/labelbox/data/annotation_types/label.py b/libs/labelbox/src/labelbox/data/annotation_types/label.py index d13fb8f20..2650e2e06 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/label.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/label.py @@ -13,6 +13,10 @@ from .metrics import ScalarMetric, ConfusionMatrixMetric from .video import VideoClassificationAnnotation from .video import VideoObjectAnnotation, VideoMaskAnnotation +from .temporal import ( + TemporalClassificationText, + TemporalClassificationQuestion, +) from .mmc import MessageEvaluationTaskAnnotation from pydantic import BaseModel, field_validator @@ -44,6 +48,8 @@ class Label(BaseModel): ClassificationAnnotation, ObjectAnnotation, VideoMaskAnnotation, + TemporalClassificationText, + TemporalClassificationQuestion, ScalarMetric, ConfusionMatrixMetric, RelationshipAnnotation, @@ -63,8 +69,22 @@ def validate_data(cls, data): def object_annotations(self) -> List[ObjectAnnotation]: return self._get_annotations_by_type(ObjectAnnotation) - def classification_annotations(self) -> List[ClassificationAnnotation]: - return self._get_annotations_by_type(ClassificationAnnotation) + def classification_annotations( + self, + ) -> List[ + Union[ + ClassificationAnnotation, + TemporalClassificationText, + TemporalClassificationQuestion, + ] + ]: + return self._get_annotations_by_type( + ( + ClassificationAnnotation, + TemporalClassificationText, + TemporalClassificationQuestion, + ) + ) def _get_annotations_by_type(self, annotation_type): return [ @@ -75,7 +95,30 @@ def _get_annotations_by_type(self, annotation_type): def frame_annotations( self, - ) -> Dict[str, Union[VideoObjectAnnotation, VideoClassificationAnnotation]]: + ) -> Dict[ + Union[int, None], + List[ + Union[ + VideoObjectAnnotation, + VideoClassificationAnnotation, + TemporalClassificationText, + TemporalClassificationQuestion, + ] + ], + ]: + """Get temporal annotations organized by frame + + Returns: + Dict[int, List]: Dictionary mapping frame (milliseconds) to list of temporal annotations + + Example: + >>> label.frame_annotations() + {2500: [VideoClassificationAnnotation(...), TemporalClassificationText(...)]} + + Note: + For TemporalClassificationText/Question, returns dictionary mapping to start of first frame range. + These annotations may have multiple discontinuous frame ranges. + """ frame_dict = defaultdict(list) for annotation in self.annotations: if isinstance( @@ -83,7 +126,27 @@ def frame_annotations( (VideoObjectAnnotation, VideoClassificationAnnotation), ): frame_dict[annotation.frame].append(annotation) - return frame_dict + elif isinstance( + annotation, + (TemporalClassificationText, TemporalClassificationQuestion), + ): + # For temporal annotations with multiple values/answers, use first frame + if ( + isinstance(annotation, TemporalClassificationText) + and annotation.value + ): + frame_dict[annotation.value[0][0]].append( + annotation + ) # value[0][0] is start_frame + elif ( + isinstance(annotation, TemporalClassificationQuestion) + and annotation.value + ): + if annotation.value[0].frames: + frame_dict[annotation.value[0].frames[0][0]].append( + annotation + ) # frames[0][0] is start_frame + return dict(frame_dict) def add_url_to_masks(self, signer) -> "Label": """ diff --git a/libs/labelbox/src/labelbox/data/annotation_types/temporal.py b/libs/labelbox/src/labelbox/data/annotation_types/temporal.py new file mode 100644 index 000000000..d52656859 --- /dev/null +++ b/libs/labelbox/src/labelbox/data/annotation_types/temporal.py @@ -0,0 +1,197 @@ +""" +Temporal classification annotations for audio, video, and other time-based media. + +These classes provide a unified, recursive structure for temporal annotations with +frame-level precision. All temporal classifications support nested hierarchies. +""" + +from typing import List, Optional, Tuple, Union +from pydantic import BaseModel, Field + + +class TemporalClassificationAnswer(BaseModel): + """ + Temporal answer for Radio/Checklist questions with frame ranges. + + Represents a single answer option that can exist at multiple discontinuous + time ranges and contain nested classifications. + + Args: + name (str): Name of the answer option + frames (List[Tuple[int, int]]): List of (start_frame, end_frame) ranges in milliseconds + classifications (Optional[List[Union[TemporalClassificationText, TemporalClassificationQuestion]]]): + Nested classifications within this answer + + Example: + >>> # Radio answer with nested classifications + >>> answer = TemporalClassificationAnswer( + >>> name="user", + >>> frames=[(200, 1600)], + >>> classifications=[ + >>> TemporalClassificationQuestion( + >>> name="tone", + >>> answers=[ + >>> TemporalClassificationAnswer( + >>> name="professional", + >>> frames=[(1000, 1600)] + >>> ) + >>> ] + >>> ) + >>> ] + >>> ) + """ + + name: str + frames: List[Tuple[int, int]] = Field( + default_factory=list, + description="List of (start_frame, end_frame) tuples in milliseconds", + ) + classifications: Optional[ + List[ + Union[ + "TemporalClassificationText", "TemporalClassificationQuestion" + ] + ] + ] = None + + +class TemporalClassificationText(BaseModel): + """ + Temporal text classification with multiple text values at different frame ranges. + + Allows multiple text annotations at different time segments, each with precise + frame ranges. Supports recursive nesting of text and question classifications. + + Args: + name (str): Name of the text classification + value (List[Tuple[int, int, str]]): List of (start_frame, end_frame, text_value) tuples + classifications (Optional[List[Union[TemporalClassificationText, TemporalClassificationQuestion]]]): + Nested classifications + + Example: + >>> # Simple text with multiple temporal values + >>> transcription = TemporalClassificationText( + >>> name="transcription", + >>> value=[ + >>> (1600, 2000, "Hello, how can I help you?"), + >>> (2500, 3000, "Thank you for calling!"), + >>> ] + >>> ) + >>> + >>> # Text with nested classifications + >>> transcription_with_notes = TemporalClassificationText( + >>> name="transcription", + >>> value=[ + >>> (1600, 2000, "Hello, how can I help you?"), + >>> ], + >>> classifications=[ + >>> TemporalClassificationText( + >>> name="speaker_notes", + >>> value=[ + >>> (1600, 2000, "Polite greeting"), + >>> ] + >>> ) + >>> ] + >>> ) + """ + + name: str + value: List[Tuple[int, int, str]] = Field( + default_factory=list, + description="List of (start_frame, end_frame, text_value) tuples", + ) + classifications: Optional[ + List[ + Union[ + "TemporalClassificationText", "TemporalClassificationQuestion" + ] + ] + ] = None + + +class TemporalClassificationQuestion(BaseModel): + """ + Temporal Radio/Checklist question with multiple answer options. + + Represents a question with one or more answer options, each having their own + frame ranges. Radio questions have a single answer, Checklist can have multiple. + + Args: + name (str): Name of the question/classification + value (List[TemporalClassificationAnswer]): List of answer options with frame ranges + classifications (Optional[List[Union[TemporalClassificationText, TemporalClassificationQuestion]]]): + Nested classifications (typically not used at question level) + + Note: + - Radio: Single answer in the value list + - Checklist: Multiple answers in the value list + The serializer automatically handles the distinction based on the number of answers. + + Example: + >>> # Radio question (single answer) + >>> speaker = TemporalClassificationQuestion( + >>> name="speaker", + >>> value=[ + >>> TemporalClassificationAnswer( + >>> name="user", + >>> frames=[(200, 1600)] + >>> ) + >>> ] + >>> ) + >>> + >>> # Checklist question (multiple answers) + >>> audio_quality = TemporalClassificationQuestion( + >>> name="audio_quality", + >>> value=[ + >>> TemporalClassificationAnswer( + >>> name="background_noise", + >>> frames=[(0, 1500), (2000, 3000)] + >>> ), + >>> TemporalClassificationAnswer( + >>> name="echo", + >>> frames=[(2200, 2900)] + >>> ) + >>> ] + >>> ) + >>> + >>> # Nested structure: Radio > Radio > Radio + >>> speaker_with_tone = TemporalClassificationQuestion( + >>> name="speaker", + >>> value=[ + >>> TemporalClassificationAnswer( + >>> name="user", + >>> frames=[(200, 1600)], + >>> classifications=[ + >>> TemporalClassificationQuestion( + >>> name="tone", + >>> value=[ + >>> TemporalClassificationAnswer( + >>> name="professional", + >>> frames=[(1000, 1600)] + >>> ) + >>> ] + >>> ) + >>> ] + >>> ) + >>> ] + >>> ) + """ + + name: str + value: List[TemporalClassificationAnswer] = Field( + default_factory=list, + description="List of temporal answer options", + ) + classifications: Optional[ + List[ + Union[ + "TemporalClassificationText", "TemporalClassificationQuestion" + ] + ] + ] = None + + +# Update forward references for recursive types +TemporalClassificationAnswer.model_rebuild() +TemporalClassificationText.model_rebuild() +TemporalClassificationQuestion.model_rebuild() diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py index 2f4799d13..39deafa64 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py @@ -24,6 +24,12 @@ VideoMaskAnnotation, VideoObjectAnnotation, ) +from typing import List +from ...annotation_types.temporal import ( + TemporalClassificationText, + TemporalClassificationQuestion, +) +from .temporal import create_temporal_ndjson_classifications from labelbox.types import DocumentRectangle, DocumentEntity from .classification import ( NDChecklistSubclass, @@ -69,6 +75,7 @@ def from_common( yield from cls._create_relationship_annotations(label) yield from cls._create_non_video_annotations(label) yield from cls._create_video_annotations(label) + yield from cls._create_temporal_classifications(label) @staticmethod def _get_consecutive_frames( @@ -159,6 +166,33 @@ def _create_video_annotations( segments.append(segment) yield NDObject.from_common(segments, label.data) + @classmethod + def _create_temporal_classifications( + cls, label: Label + ) -> Generator[BaseModel, None, None]: + """Create temporal annotations with nested classifications using new temporal classes.""" + # Extract temporal annotations from the label + temporal_annotations = [ + annot + for annot in label.annotations + if isinstance( + annot, + (TemporalClassificationText, TemporalClassificationQuestion), + ) + ] + + if not temporal_annotations: + return + + # Use the new temporal serializer to create NDJSON annotations + ndjson_annotations = create_temporal_ndjson_classifications( + temporal_annotations, label.data.global_key + ) + + # Yield each NDJSON annotation + for annotation in ndjson_annotations: + yield annotation + @classmethod def _create_non_video_annotations(cls, label: Label): non_video_annotations = [ @@ -170,6 +204,8 @@ def _create_non_video_annotations(cls, label: Label): VideoClassificationAnnotation, VideoObjectAnnotation, VideoMaskAnnotation, + TemporalClassificationText, + TemporalClassificationQuestion, RelationshipAnnotation, ), ) @@ -187,7 +223,7 @@ def _create_non_video_annotations(cls, label: Label): yield NDMessageTask.from_common(annotation, label.data) else: raise TypeError( - f"Unable to convert object to MAL format. `{type(getattr(annotation, 'value',annotation))}`" + f"Unable to convert object to MAL format. `{type(getattr(annotation, 'value', annotation))}`" ) @classmethod diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/temporal.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/temporal.py new file mode 100644 index 000000000..eb281fdd1 --- /dev/null +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/temporal.py @@ -0,0 +1,418 @@ +""" +Temporal NDJSON serialization for new temporal classification structure. + +Handles TemporalClassificationText, TemporalClassificationQuestion, and TemporalClassificationAnswer +with frame validation and recursive nesting support. +""" + +import logging +from collections import defaultdict +from typing import Any, Dict, List, Tuple, Union +from pydantic import BaseModel + +from ...annotation_types.temporal import ( + TemporalClassificationText, + TemporalClassificationQuestion, +) + +logger = logging.getLogger(__name__) + + +class TemporalNDJSON(BaseModel): + """NDJSON structure for temporal annotations""" + + name: str + answer: List[Dict[str, Any]] + dataRow: Dict[str, str] + + +def create_temporal_ndjson_classifications( + annotations: List[ + Union[TemporalClassificationText, TemporalClassificationQuestion] + ], + data_global_key: str, +) -> List[TemporalNDJSON]: + """ + Create NDJSON temporal annotations from new temporal classification types. + + Args: + annotations: List of TemporalClassificationText or TemporalClassificationQuestion + data_global_key: Global key for the data row + + Returns: + List of TemporalNDJSON objects ready for serialization + """ + if not annotations: + return [] + + # Group by classification name + groups = defaultdict(list) + for ann in annotations: + groups[ann.name].append(ann) + + results = [] + for group_key, group_anns in groups.items(): + # Get display name (prefer first non-empty name) + display_name = next((a.name for a in group_anns if a.name), group_key) + + # Process based on annotation type + first_ann = group_anns[0] + + if isinstance(first_ann, TemporalClassificationText): + answers = _process_text_group(group_anns, parent_frames=None) + elif isinstance(first_ann, TemporalClassificationQuestion): + answers = _process_question_group(group_anns, parent_frames=None) + else: + logger.warning( + f"Unknown temporal annotation type: {type(first_ann)}" + ) + continue + + if answers: # Only add if we have valid answers + results.append( + TemporalNDJSON( + name=display_name, + answer=answers, + dataRow={"globalKey": data_global_key}, + ) + ) + + return results + + +def _process_text_group( + annotations: List[TemporalClassificationText], + parent_frames: List[Tuple[int, int]] = None, +) -> List[Dict[str, Any]]: + """ + Process TemporalClassificationText annotations. + + Each annotation can have multiple (start, end, text) tuples. + Groups by text value and merges frames. + + Nested classifications are assigned to text values based on frame overlap. + """ + # Collect all text values with their frames + text_data = defaultdict(lambda: {"frames": []}) + + # Collect all nested classifications from all annotations + all_nested_classifications = [] + + for ann in annotations: + for start, end, text_value in ann.value: + # Validate frames against parent if provided + if parent_frames and not _is_frame_subset( + [(start, end)], parent_frames + ): + logger.warning( + f"Text value frames ({start}, {end}) not subset of parent frames {parent_frames}. Discarding." + ) + continue + + text_data[text_value]["frames"].append({"start": start, "end": end}) + + # Collect nested classifications at annotation level (not per text value) + if ann.classifications: + all_nested_classifications.extend(ann.classifications) + + # Track which nested classifications were assigned + assigned_nested = set() + + # Build results + results = [] + for text_value, data in text_data.items(): + # Deduplicate frames + unique_frames = _deduplicate_frames(data["frames"]) + + entry = { + "value": text_value, + "frames": unique_frames, + } + + # Assign nested classifications based on frame overlap + if all_nested_classifications: + parent_frame_tuples = [ + (f["start"], f["end"]) for f in unique_frames + ] + # Filter nested classifications that overlap with this text value's frames + relevant_nested = _filter_classifications_by_overlap( + all_nested_classifications, parent_frame_tuples + ) + if relevant_nested: + # Track that these were assigned + for cls in relevant_nested: + assigned_nested.add(id(cls)) + + # Pass ONLY THIS text value's frames so nested answers are filtered correctly + nested = _process_nested_classifications( + relevant_nested, parent_frame_tuples + ) + if nested: + entry["classifications"] = nested + + results.append(entry) + + # Log orphaned nested classifications (not assigned to any parent) + if all_nested_classifications: + for cls in all_nested_classifications: + if id(cls) not in assigned_nested: + if isinstance(cls, TemporalClassificationText): + frames_info = cls.value[0][:2] if cls.value else "no frames" + elif isinstance(cls, TemporalClassificationQuestion): + frames_info = ( + cls.value[0].frames + if cls.value and cls.value[0].frames + else "no frames" + ) + else: + frames_info = "unknown" + logger.warning( + f"Orphaned nested classification '{cls.name}' with frames {frames_info} - " + f"no parent text value found with overlapping frames." + ) + + return results + + +def _process_question_group( + annotations: List[TemporalClassificationQuestion], + parent_frames: List[Tuple[int, int]] = None, +) -> List[Dict[str, Any]]: + """ + Process TemporalClassificationQuestion annotations. + + Each annotation has a list of TemporalClassificationAnswer objects. + Groups by answer name and merges frames. + + Nested classifications are assigned to answers based on frame overlap. + """ + # Collect all answers with their frames + answer_data = defaultdict(lambda: {"frames": []}) + + # Collect all nested classifications from all answers + all_nested_by_answer = defaultdict(list) + + for ann in annotations: + for answer in ann.value: # value contains list of answers + # Validate and collect frames + valid_frames = [] + for start, end in answer.frames: + # If parent_frames provided, check if answer frames are subset of ANY parent frame + # A child frame is a subset if: parent_start <= child_start AND child_end <= parent_end + if parent_frames: + is_valid = False + for parent_start, parent_end in parent_frames: + if parent_start <= start and end <= parent_end: + is_valid = True + break + if not is_valid: + # Don't log here - this is expected when processing inductive structures + # Only log orphaned classifications that are never assigned to any parent + continue + valid_frames.append({"start": start, "end": end}) + + if valid_frames: # Only add if we have valid frames + answer_data[answer.name]["frames"].extend(valid_frames) + + # Collect nested classifications at answer level + if answer.classifications: + all_nested_by_answer[answer.name].extend( + answer.classifications + ) + + # Track which nested classifications were assigned + assigned_nested = set() + + # Build results + results = [] + for answer_name, data in answer_data.items(): + # Deduplicate frames + unique_frames = _deduplicate_frames(data["frames"]) + + if not unique_frames: # Skip if no valid frames + continue + + entry = { + "name": answer_name, + "frames": unique_frames, + } + + # Assign nested classifications based on frame overlap + if all_nested_by_answer[answer_name]: + parent_frame_tuples = [ + (f["start"], f["end"]) for f in unique_frames + ] + # Filter nested classifications that overlap with this answer's frames + relevant_nested = _filter_classifications_by_overlap( + all_nested_by_answer[answer_name], parent_frame_tuples + ) + if relevant_nested: + # Track that these were assigned + for cls in relevant_nested: + assigned_nested.add(id(cls)) + + nested = _process_nested_classifications( + relevant_nested, parent_frame_tuples + ) + if nested: + entry["classifications"] = nested + + results.append(entry) + + # Log orphaned nested classifications (not assigned to any answer) + for answer_name, nested_list in all_nested_by_answer.items(): + for cls in nested_list: + if id(cls) not in assigned_nested: + if isinstance(cls, TemporalClassificationText): + frames_info = cls.value[0][:2] if cls.value else "no frames" + elif isinstance(cls, TemporalClassificationQuestion): + frames_info = ( + cls.value[0].frames + if cls.value and cls.value[0].frames + else "no frames" + ) + else: + frames_info = "unknown" + logger.warning( + f"Orphaned nested classification '{cls.name}' in answer '{answer_name}' with frames {frames_info} - " + f"no overlapping frames found with parent answer." + ) + + return results + + +def _process_nested_classifications( + classifications: List[ + Union[TemporalClassificationText, TemporalClassificationQuestion] + ], + parent_frames: List[Tuple[int, int]], +) -> List[Dict[str, Any]]: + """ + Process nested classifications recursively. + + Groups by name and processes each group. + """ + # Group by name + groups = defaultdict(list) + for cls in classifications: + groups[cls.name].append(cls) + + results = [] + for group_key, group_items in groups.items(): + # Get display name + display_name = next((c.name for c in group_items if c.name), group_key) + + # Process based on type + first_item = group_items[0] + + if isinstance(first_item, TemporalClassificationText): + answers = _process_text_group(group_items, parent_frames) + elif isinstance(first_item, TemporalClassificationQuestion): + answers = _process_question_group(group_items, parent_frames) + else: + logger.warning( + f"Unknown nested classification type: {type(first_item)}" + ) + continue + + if answers: # Only add if we have valid answers + results.append( + { + "name": display_name, + "answer": answers, + } + ) + + return results + + +def _filter_classifications_by_overlap( + classifications: List[ + Union[TemporalClassificationText, TemporalClassificationQuestion] + ], + parent_frames: List[Tuple[int, int]], +) -> List[Union[TemporalClassificationText, TemporalClassificationQuestion]]: + """ + Filter classifications to only include those with frames that overlap with parent frames. + + A classification is included if ANY of its frame ranges overlap with ANY parent frame range. + """ + relevant = [] + + for cls in classifications: + has_overlap = False + + # Check frames based on classification type + if isinstance(cls, TemporalClassificationText): + # Check text value frames + for start, end, _ in cls.value: + if _frames_overlap([(start, end)], parent_frames): + has_overlap = True + break + elif isinstance(cls, TemporalClassificationQuestion): + # Check answer frames + for answer in cls.value: + if _frames_overlap(answer.frames, parent_frames): + has_overlap = True + break + + if has_overlap: + relevant.append(cls) + + return relevant + + +def _frames_overlap( + frames1: List[Tuple[int, int]], + frames2: List[Tuple[int, int]], +) -> bool: + """ + Check if any frame in frames1 overlaps with any frame in frames2. + + Two frames (s1, e1) and (s2, e2) overlap if: + max(s1, s2) <= min(e1, e2) + """ + for start1, end1 in frames1: + for start2, end2 in frames2: + if max(start1, start2) <= min(end1, end2): + return True + return False + + +def _is_frame_subset( + child_frames: List[Tuple[int, int]], + parent_frames: List[Tuple[int, int]], +) -> bool: + """ + Check if all child frames are subsets of at least one parent frame. + + A child frame (cs, ce) is a subset of parent frame (ps, pe) if: + ps <= cs and ce <= pe + """ + for child_start, child_end in child_frames: + is_subset = False + for parent_start, parent_end in parent_frames: + if parent_start <= child_start and child_end <= parent_end: + is_subset = True + break + + if not is_subset: + return False # At least one child frame is not a subset + + return True + + +def _deduplicate_frames(frames: List[Dict[str, int]]) -> List[Dict[str, int]]: + """ + Remove duplicate frame ranges. + """ + seen = set() + unique = [] + + for frame in frames: + frame_tuple = (frame["start"], frame["end"]) + if frame_tuple not in seen: + seen.add(frame_tuple) + unique.append(frame) + + return unique diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_temporal.py b/libs/labelbox/tests/data/serialization/ndjson/test_temporal.py new file mode 100644 index 000000000..1c0a361c1 --- /dev/null +++ b/libs/labelbox/tests/data/serialization/ndjson/test_temporal.py @@ -0,0 +1,478 @@ +"""Tests for new temporal classification serialization""" + +import labelbox.types as lb_types +from labelbox.data.serialization.ndjson.temporal import ( + create_temporal_ndjson_classifications, +) + + +def test_temporal_text_simple(): + """Test simple TemporalClassificationText serialization""" + annotations = [ + lb_types.TemporalClassificationText( + name="transcription", + value=[ + (1000, 1100, "Hello"), + (1500, 2400, "How can I help you?"), + ], + ) + ] + + result = create_temporal_ndjson_classifications( + annotations, "test-global-key" + ) + + assert len(result) == 1 + assert result[0].name == "transcription" + assert len(result[0].answer) == 2 + + # Check first text value + answer0 = result[0].answer[0] + assert answer0["value"] == "Hello" + assert answer0["frames"] == [{"start": 1000, "end": 1100}] + + # Check second text value + answer1 = result[0].answer[1] + assert answer1["value"] == "How can I help you?" + assert answer1["frames"] == [{"start": 1500, "end": 2400}] + + +def test_temporal_question_radio(): + """Test TemporalClassificationQuestion with single answer (Radio)""" + annotations = [ + lb_types.TemporalClassificationQuestion( + name="speaker", + value=[ + lb_types.TemporalClassificationAnswer( + name="user", + frames=[(200, 1600)], + ) + ], + ) + ] + + result = create_temporal_ndjson_classifications( + annotations, "test-global-key" + ) + + assert len(result) == 1 + assert result[0].name == "speaker" + assert len(result[0].answer) == 1 + + answer = result[0].answer[0] + assert answer["name"] == "user" + assert answer["frames"] == [{"start": 200, "end": 1600}] + + +def test_temporal_question_checklist(): + """Test TemporalClassificationQuestion with multiple answers (Checklist)""" + annotations = [ + lb_types.TemporalClassificationQuestion( + name="audio_quality", + value=[ + lb_types.TemporalClassificationAnswer( + name="background_noise", + frames=[(0, 1500), (2000, 3000)], + ), + lb_types.TemporalClassificationAnswer( + name="echo", + frames=[(2200, 2900)], + ), + ], + ) + ] + + result = create_temporal_ndjson_classifications( + annotations, "test-global-key" + ) + + assert len(result) == 1 + assert result[0].name == "audio_quality" + assert len(result[0].answer) == 2 + + # Check background_noise answer + bg_noise = next( + a for a in result[0].answer if a["name"] == "background_noise" + ) + assert bg_noise["frames"] == [ + {"start": 0, "end": 1500}, + {"start": 2000, "end": 3000}, + ] + + # Check echo answer + echo = next(a for a in result[0].answer if a["name"] == "echo") + assert echo["frames"] == [{"start": 2200, "end": 2900}] + + +def test_temporal_text_nested(): + """Test TemporalClassificationText with nested classifications""" + annotations = [ + lb_types.TemporalClassificationText( + name="transcription", + value=[ + (1600, 2000, "Hello, how can I help you?"), + ], + classifications=[ + lb_types.TemporalClassificationText( + name="speaker_notes", + value=[ + (1600, 2000, "Polite greeting"), + ], + classifications=[ + lb_types.TemporalClassificationText( + name="context_tags", + value=[ + (1800, 2000, "customer service tone"), + ], + ) + ], + ) + ], + ) + ] + + result = create_temporal_ndjson_classifications( + annotations, "test-global-key" + ) + + assert len(result) == 1 + assert result[0].name == "transcription" + assert len(result[0].answer) == 1 + + answer = result[0].answer[0] + assert answer["value"] == "Hello, how can I help you?" + assert answer["frames"] == [{"start": 1600, "end": 2000}] + + # Check nested classifications + assert "classifications" in answer + assert len(answer["classifications"]) == 1 + + nested1 = answer["classifications"][0] + assert nested1["name"] == "speaker_notes" + assert len(nested1["answer"]) == 1 + assert nested1["answer"][0]["value"] == "Polite greeting" + + # Check deeper nesting + assert "classifications" in nested1["answer"][0] + nested2 = nested1["answer"][0]["classifications"][0] + assert nested2["name"] == "context_tags" + assert nested2["answer"][0]["value"] == "customer service tone" + + +def test_temporal_question_nested(): + """Test TemporalClassificationQuestion with nested classifications""" + annotations = [ + lb_types.TemporalClassificationQuestion( + name="speaker", + value=[ + lb_types.TemporalClassificationAnswer( + name="user", + frames=[(200, 1600)], + classifications=[ + lb_types.TemporalClassificationQuestion( + name="tone", + value=[ + lb_types.TemporalClassificationAnswer( + name="professional", + frames=[(1000, 1600)], + classifications=[ + lb_types.TemporalClassificationQuestion( + name="clarity", + value=[ + lb_types.TemporalClassificationAnswer( + name="clear", + frames=[(1300, 1600)], + ) + ], + ) + ], + ) + ], + ) + ], + ) + ], + ) + ] + + result = create_temporal_ndjson_classifications( + annotations, "test-global-key" + ) + + assert len(result) == 1 + answer = result[0].answer[0] + assert answer["name"] == "user" + + # Check nested tone + assert "classifications" in answer + tone = answer["classifications"][0] + assert tone["name"] == "tone" + assert tone["answer"][0]["name"] == "professional" + + # Check deeper nested clarity + clarity = tone["answer"][0]["classifications"][0] + assert clarity["name"] == "clarity" + assert clarity["answer"][0]["name"] == "clear" + assert clarity["answer"][0]["frames"] == [{"start": 1300, "end": 1600}] + + +def test_frame_validation_discard_invalid(): + """Test that invalid frames (not subset of parent) are discarded""" + annotations = [ + lb_types.TemporalClassificationQuestion( + name="speaker", + value=[ + lb_types.TemporalClassificationAnswer( + name="user", + frames=[(200, 1600)], # Parent range + classifications=[ + lb_types.TemporalClassificationText( + name="notes", + value=[ + (300, 800, "Valid note"), # Within parent range + ( + 1700, + 2000, + "Invalid note", + ), # Outside parent range + ], + ) + ], + ) + ], + ) + ] + + result = create_temporal_ndjson_classifications( + annotations, "test-global-key" + ) + + # Find the nested notes classification + answer = result[0].answer[0] + notes = answer["classifications"][0] + + # Only the valid note should be present + assert len(notes["answer"]) == 1 + assert notes["answer"][0]["value"] == "Valid note" + assert notes["answer"][0]["frames"] == [{"start": 300, "end": 800}] + + +def test_frame_deduplication(): + """Test that duplicate frames are removed""" + annotations = [ + lb_types.TemporalClassificationText( + name="transcription", + value=[ + (1000, 1100, "Hello"), + (1000, 1100, "Hello"), # Duplicate + ], + ) + ] + + result = create_temporal_ndjson_classifications( + annotations, "test-global-key" + ) + + # Should only have one entry + assert len(result[0].answer) == 1 + assert result[0].answer[0]["frames"] == [{"start": 1000, "end": 1100}] + + +def test_mixed_text_and_question_nesting(): + """Test Checklist > Text > Radio nesting""" + annotations = [ + lb_types.TemporalClassificationQuestion( + name="checklist_class", + value=[ + lb_types.TemporalClassificationAnswer( + name="quality_check", + frames=[(1, 1500)], + classifications=[ + lb_types.TemporalClassificationText( + name="notes_text", + value=[ + (1, 1500, "Audio quality is excellent"), + ], + classifications=[ + lb_types.TemporalClassificationQuestion( + name="severity_radio", + value=[ + lb_types.TemporalClassificationAnswer( + name="minor", + frames=[(1, 1500)], + ) + ], + ) + ], + ) + ], + ) + ], + ) + ] + + result = create_temporal_ndjson_classifications( + annotations, "test-global-key" + ) + + assert len(result) == 1 + answer = result[0].answer[0] + assert answer["name"] == "quality_check" + + # Check text classification + text_cls = answer["classifications"][0] + assert text_cls["name"] == "notes_text" + assert text_cls["answer"][0]["value"] == "Audio quality is excellent" + + # Check radio classification + radio_cls = text_cls["answer"][0]["classifications"][0] + assert radio_cls["name"] == "severity_radio" + assert radio_cls["answer"][0]["name"] == "minor" + + +def test_inductive_structure_text_with_shared_nested_radio(): + """ + Test inductive structure where multiple text values share the same nested radio classification. + + Each text value should get its own instance of the nested radio with only the radio answers + that overlap with that text value's frames. + """ + annotations = [ + lb_types.TemporalClassificationText( + name="content_notes", + value=[ + (1000, 1500, "Topic is relevant"), + (1501, 2000, "Good pacing"), + ], + classifications=[ + # Shared nested radio with answers for BOTH text values + lb_types.TemporalClassificationQuestion( + name="clarity_radio", + value=[ + lb_types.TemporalClassificationAnswer( + name="very_clear", + frames=[(1000, 1500)], + ), + lb_types.TemporalClassificationAnswer( + name="slightly_clear", + frames=[(1501, 2000)], + ), + ], + ) + ], + ) + ] + + result = create_temporal_ndjson_classifications( + annotations, "test-global-key" + ) + + assert len(result) == 1 + assert result[0].name == "content_notes" + assert len(result[0].answer) == 2 + + # Check first text value: "Topic is relevant" + text1 = next( + a for a in result[0].answer if a["value"] == "Topic is relevant" + ) + assert text1["frames"] == [{"start": 1000, "end": 1500}] + assert "classifications" in text1 + assert len(text1["classifications"]) == 1 + + # Should only have "very_clear" radio answer (overlaps with 1000-1500) + radio1 = text1["classifications"][0] + assert radio1["name"] == "clarity_radio" + assert len(radio1["answer"]) == 1 + assert radio1["answer"][0]["name"] == "very_clear" + assert radio1["answer"][0]["frames"] == [{"start": 1000, "end": 1500}] + + # Check second text value: "Good pacing" + text2 = next(a for a in result[0].answer if a["value"] == "Good pacing") + assert text2["frames"] == [{"start": 1501, "end": 2000}] + assert "classifications" in text2 + assert len(text2["classifications"]) == 1 + + # Should only have "slightly_clear" radio answer (overlaps with 1501-2000) + radio2 = text2["classifications"][0] + assert radio2["name"] == "clarity_radio" + assert len(radio2["answer"]) == 1 + assert radio2["answer"][0]["name"] == "slightly_clear" + assert radio2["answer"][0]["frames"] == [{"start": 1501, "end": 2000}] + + +def test_inductive_structure_checklist_with_multiple_text_values(): + """ + Test inductive structure with Checklist > Text > Radio where text has multiple values + and nested radio has answers that map to different text values. + """ + annotations = [ + lb_types.TemporalClassificationQuestion( + name="checklist_class", + value=[ + lb_types.TemporalClassificationAnswer( + name="content_check", + frames=[(1000, 2000)], + classifications=[ + lb_types.TemporalClassificationText( + name="content_notes_text", + value=[ + (1000, 1500, "Topic is relevant"), + (1501, 2000, "Good pacing"), + ], + classifications=[ + # Nested radio with multiple answers covering different text value frames + lb_types.TemporalClassificationQuestion( + name="clarity_radio", + value=[ + lb_types.TemporalClassificationAnswer( + name="very_clear", + frames=[(1000, 1500)], + ), + lb_types.TemporalClassificationAnswer( + name="slightly_clear", + frames=[(1501, 2000)], + ), + ], + ) + ], + ) + ], + ) + ], + ) + ] + + result = create_temporal_ndjson_classifications( + annotations, "test-global-key" + ) + + assert len(result) == 1 + assert result[0].name == "checklist_class" + + # Get the content_check answer + content_check = result[0].answer[0] + assert content_check["name"] == "content_check" + assert content_check["frames"] == [{"start": 1000, "end": 2000}] + + # Get the nested text classification + text_cls = content_check["classifications"][0] + assert text_cls["name"] == "content_notes_text" + assert len(text_cls["answer"]) == 2 + + # Check first text value and its nested radio + text1 = next( + a for a in text_cls["answer"] if a["value"] == "Topic is relevant" + ) + assert text1["frames"] == [{"start": 1000, "end": 1500}] + radio1 = text1["classifications"][0] + assert radio1["name"] == "clarity_radio" + assert len(radio1["answer"]) == 1 + assert radio1["answer"][0]["name"] == "very_clear" + + # Check second text value and its nested radio + text2 = next(a for a in text_cls["answer"] if a["value"] == "Good pacing") + assert text2["frames"] == [{"start": 1501, "end": 2000}] + radio2 = text2["classifications"][0] + assert radio2["name"] == "clarity_radio" + assert len(radio2["answer"]) == 1 + assert radio2["answer"][0]["name"] == "slightly_clear"