1+ """
2+ Generic temporal annotation processor for frame-based media (video, audio)
3+ """
4+ from abc import ABC , abstractmethod
5+ from collections import defaultdict
6+ from typing import Any , Dict , Generator , List , Union
7+
8+ from ...annotation_types .annotation import ClassificationAnnotation , ObjectAnnotation
9+ from ...annotation_types .label import Label
10+ from .classification import NDClassificationType , NDClassification
11+ from .objects import NDObject
12+
13+
14+ class TemporalAnnotationProcessor (ABC ):
15+ """Abstract base class for processing temporal annotations (video, audio, etc.)"""
16+
17+ @abstractmethod
18+ def get_annotation_types (self ) -> tuple :
19+ """Return tuple of annotation types this processor handles"""
20+ pass
21+
22+ @abstractmethod
23+ def should_group_annotations (self , annotation_group : List ) -> bool :
24+ """Determine if annotations should be grouped into one feature"""
25+ pass
26+
27+ @abstractmethod
28+ def build_frame_data (self , annotation_group : List ) -> List [Dict [str , Any ]]:
29+ """Extract frame data from annotation group"""
30+ pass
31+
32+ @abstractmethod
33+ def prepare_grouped_content (self , annotation_group : List ) -> Any :
34+ """Prepare content for grouped annotations (may modify annotation.value)"""
35+ pass
36+
37+ def process_annotations (self , label : Label ) -> Generator [Union [NDClassificationType , Any ], None , None ]:
38+ """Main processing method - generic for all temporal media"""
39+ temporal_annotations = defaultdict (list )
40+ classification_types , object_types = self .get_annotation_types ()
41+
42+ # Group annotations by feature name/schema
43+ for annot in label .annotations :
44+ if isinstance (annot , classification_types + object_types ):
45+ temporal_annotations [annot .feature_schema_id or annot .name ].append (annot )
46+
47+ # Process each group
48+ for annotation_group in temporal_annotations .values ():
49+ if isinstance (annotation_group [0 ], classification_types ):
50+ yield from self ._process_classification_group (annotation_group , label .data )
51+ elif isinstance (annotation_group [0 ], object_types ):
52+ yield from self ._process_object_group (annotation_group , label .data )
53+
54+ def _process_classification_group (self , annotation_group , data ):
55+ """Process classification annotations"""
56+ if self .should_group_annotations (annotation_group ):
57+ # Group into single feature with multiple keyframes
58+ annotation = annotation_group [0 ] # Use first as template
59+
60+ # Build frame data
61+ frames_data = self .build_frame_data (annotation_group )
62+
63+ # Prepare content (may modify annotation.value)
64+ self .prepare_grouped_content (annotation_group )
65+
66+ # Update with frame data
67+ annotation .extra = {"frames" : frames_data }
68+ yield NDClassification .from_common (annotation , data )
69+ else :
70+ # Process individually
71+ for annotation in annotation_group :
72+ frames_data = self .build_frame_data ([annotation ])
73+ if frames_data :
74+ if not annotation .extra :
75+ annotation .extra = {}
76+ annotation .extra .update ({"frames" : frames_data })
77+ yield NDClassification .from_common (annotation , data )
78+
79+ def _process_object_group (self , annotation_group , data ):
80+ """Process object annotations - default to individual processing"""
81+ for annotation in annotation_group :
82+ yield NDObject .from_common (annotation , data )
83+
84+
85+ class AudioTemporalProcessor (TemporalAnnotationProcessor ):
86+ """Processor for audio temporal annotations"""
87+
88+ def __init__ (self ,
89+ group_text_annotations : bool = True ,
90+ enable_token_mapping : bool = True ):
91+ self .group_text_annotations = group_text_annotations
92+ self .enable_token_mapping = enable_token_mapping
93+
94+ def get_annotation_types (self ) -> tuple :
95+ from ...annotation_types .audio import AudioClassificationAnnotation , AudioObjectAnnotation
96+ return (AudioClassificationAnnotation ,), (AudioObjectAnnotation ,)
97+
98+ def should_group_annotations (self , annotation_group : List ) -> bool :
99+ """Group TEXT classifications with multiple temporal instances"""
100+ if not self .group_text_annotations :
101+ return False
102+
103+ from ...annotation_types .classification .classification import Text
104+ return (isinstance (annotation_group [0 ].value , Text ) and
105+ len (annotation_group ) > 1 and
106+ all (hasattr (ann , 'frame' ) for ann in annotation_group ))
107+
108+ def build_frame_data (self , annotation_group : List ) -> List [Dict [str , Any ]]:
109+ """Extract frame ranges from audio annotations"""
110+ frames_data = []
111+ for annotation in annotation_group :
112+ if hasattr (annotation , 'frame' ):
113+ frame = annotation .frame
114+ end_frame = (annotation .end_frame
115+ if hasattr (annotation , 'end_frame' ) and annotation .end_frame is not None
116+ else frame )
117+ frames_data .append ({"start" : frame , "end" : end_frame })
118+ return frames_data
119+
120+ def prepare_grouped_content (self , annotation_group : List ) -> None :
121+ """Prepare content for grouped audio annotations"""
122+ from ...annotation_types .classification .classification import Text
123+
124+ if not isinstance (annotation_group [0 ].value , Text ) or not self .enable_token_mapping :
125+ return
126+
127+ # Build token mapping for TEXT annotations
128+ import json
129+
130+ all_content = [ann .value .answer for ann in annotation_group ]
131+ token_mapping = {str (ann .frame ): ann .value .answer for ann in annotation_group }
132+
133+ content_structure = json .dumps ({
134+ "default_text" : " " .join (all_content ),
135+ "token_mapping" : token_mapping
136+ })
137+
138+ # Update the template annotation
139+ annotation_group [0 ].value = Text (answer = content_structure )
140+
141+
142+ class VideoTemporalProcessor (TemporalAnnotationProcessor ):
143+ """Processor for video temporal annotations - matches existing behavior"""
144+
145+ def get_annotation_types (self ) -> tuple :
146+ from ...annotation_types .video import VideoClassificationAnnotation , VideoObjectAnnotation
147+ return (VideoClassificationAnnotation ,), (VideoObjectAnnotation ,)
148+
149+ def should_group_annotations (self , annotation_group : List ) -> bool :
150+ """Video always groups by segment ranges"""
151+ return True
152+
153+ def build_frame_data (self , annotation_group : List ) -> List [Dict [str , Any ]]:
154+ """Build frame data using existing video segment logic"""
155+ from .label import NDLabel # Import here to avoid circular import
156+
157+ segment_frame_ranges = NDLabel ._get_segment_frame_ranges (annotation_group )
158+ return [{"start" : frames [0 ], "end" : frames [- 1 ]} for frames in segment_frame_ranges ]
159+
160+ def prepare_grouped_content (self , annotation_group : List ) -> None :
161+ """Video doesn't modify content - uses existing value"""
162+ pass
163+
164+ def _process_object_group (self , annotation_group , data ):
165+ """Video objects use segment-based processing"""
166+ from .label import NDLabel
167+
168+ segment_frame_ranges = NDLabel ._get_segment_frame_ranges (annotation_group )
169+ segments = []
170+ for start_frame , end_frame in segment_frame_ranges :
171+ segment = []
172+ for annotation in annotation_group :
173+ if (annotation .keyframe and
174+ start_frame <= annotation .frame <= end_frame ):
175+ segment .append (annotation )
176+ segments .append (segment )
177+ yield NDObject .from_common (segments , data )
0 commit comments