@@ -93,15 +93,15 @@ def group_by_value(self, annotations: List[TemporalAnnotation]) -> List[Dict[str
9393
9494 entries = []
9595 for _ , anns in value_buckets .items ():
96- first = anns [0 ]
9796 # Extract frames from each annotation (root frames)
9897 frames = [self .frame_extractor (a ) for a in anns ]
9998 frame_dicts = [{"start" : start , "end" : end } for start , end in frames ]
10099
101- # Get root frames for passing to nested classifications
100+ # Get root frames for passing to nested classifications (use first annotation's frames)
102101 root_frames = frames [0 ] if frames else (None , None )
103102
104- entry = self ._create_answer_entry (first , frame_dicts , root_frames )
103+ # Pass ALL annotations so we can merge their nested classifications
104+ entry = self ._create_answer_entry (anns , frame_dicts , root_frames )
105105 entries .append (entry )
106106
107107 return entries
@@ -138,49 +138,80 @@ def _get_nested_frames(self, obj: Any, parent_frames: List[Dict[str, int]], root
138138 # Use explicitly specified frames
139139 return [{"start" : obj .start_frame , "end" : obj .end_frame }]
140140 else :
141- # Default to root frames
142- if root_frames and root_frames [0 ] is not None and root_frames [1 ] is not None :
141+ # Default to parent frames first, then root frames
142+ if parent_frames :
143+ return parent_frames
144+ elif root_frames and root_frames [0 ] is not None and root_frames [1 ] is not None :
143145 return [{"start" : root_frames [0 ], "end" : root_frames [1 ]}]
144146 else :
145- # Fall back to parent frames if root not available
146- return parent_frames
147+ return []
147148
148- def _create_answer_entry (self , first_ann : TemporalAnnotation , frames : List [Dict [str , int ]], root_frames : Tuple [int , int ]) -> Dict [str , Any ]:
149- """Create an answer entry from the first annotation and frames .
149+ def _create_answer_entry (self , anns : List [ TemporalAnnotation ] , frames : List [Dict [str , int ]], root_frames : Tuple [int , int ]) -> Dict [str , Any ]:
150+ """Create an answer entry from all annotations with the same value, merging their nested classifications .
150151
151152 Args:
152- first_ann: The first annotation in the value group
153+ anns: All annotations in the value group
153154 frames: List of frame dictionaries for this answer
154155 root_frames: Tuple of (start, end) from the root AudioClassificationAnnotation
155156 """
157+ first_ann = anns [0 ]
158+
156159 if hasattr (first_ann .value , "answer" ) and isinstance (first_ann .value .answer , list ):
157- # Checklist: emit one entry per distinct option present in this bucket
160+ # Checklist: emit one entry per distinct option present across ALL annotations
161+ # First, collect all unique option names across all annotations
162+ all_option_names = set ()
163+ for ann in anns :
164+ if hasattr (ann .value , "answer" ) and isinstance (ann .value .answer , list ):
165+ for opt in ann .value .answer :
166+ all_option_names .add (opt .name )
167+
158168 entries = []
159- for opt in first_ann .value .answer :
160- # Get frames for this specific checklist option (from opt or parent)
161- opt_frames = self ._get_nested_frames (opt , frames , root_frames )
162- entry = {"name" : opt .name , "frames" : opt_frames }
163- # Handle explicit nesting for this checklist option
164- if hasattr (opt , 'classifications' ) and opt .classifications :
165- entry ["classifications" ] = self ._serialize_explicit_classifications (opt .classifications , root_frames )
169+ for opt_name in sorted (all_option_names ): # Sort for consistent ordering
170+ # For each unique option, collect frames and nested classifications from all annotations
171+ opt_frames = []
172+ all_nested = []
173+ for ann in anns :
174+ if hasattr (ann .value , "answer" ) and isinstance (ann .value .answer , list ):
175+ for ann_opt in ann .value .answer :
176+ if ann_opt .name == opt_name :
177+ # Get this annotation's root frame range
178+ ann_start , ann_end = self .frame_extractor (ann )
179+ ann_frame_dict = [{"start" : ann_start , "end" : ann_end }]
180+ # Collect this option's frame range (from option or parent annotation)
181+ frames_for_this_opt = self ._get_nested_frames (ann_opt , ann_frame_dict , root_frames )
182+ opt_frames .extend (frames_for_this_opt )
183+ # Collect nested classifications
184+ if hasattr (ann_opt , 'classifications' ) and ann_opt .classifications :
185+ all_nested .extend (ann_opt .classifications )
186+
187+ entry = {"name" : opt_name , "frames" : opt_frames }
188+ if all_nested :
189+ entry ["classifications" ] = self ._serialize_explicit_classifications (all_nested , root_frames )
166190 entries .append (entry )
167191 return entries [0 ] if len (entries ) == 1 else {"options" : entries , "frames" : frames }
168192 elif hasattr (first_ann .value , "answer" ) and hasattr (first_ann .value .answer , "name" ):
169193 # Radio
170194 opt = first_ann .value .answer
171- # Get frames for this radio answer (from answer or parent)
172- opt_frames = self ._get_nested_frames (opt , frames , root_frames )
173- entry = {"name" : opt .name , "frames" : opt_frames }
174- # Handle explicit nesting via ClassificationAnswer.classifications
175- if hasattr (opt , 'classifications' ) and opt .classifications :
176- entry ["classifications" ] = self ._serialize_explicit_classifications (opt .classifications , root_frames )
195+ # Use the merged frames from all annotations (already passed in)
196+ entry = {"name" : opt .name , "frames" : frames }
197+ # Collect nested classifications from all annotations
198+ all_nested = []
199+ for ann in anns :
200+ if hasattr (ann .value , "answer" ) and hasattr (ann .value .answer , "classifications" ) and ann .value .answer .classifications :
201+ all_nested .extend (ann .value .answer .classifications )
202+ if all_nested :
203+ entry ["classifications" ] = self ._serialize_explicit_classifications (all_nested , root_frames )
177204 return entry
178205 else :
179206 # Text - nesting is at the annotation level, not answer level
180207 entry = {"value" : first_ann .value .answer , "frames" : frames }
181- # Handle explicit nesting via AudioClassificationAnnotation.classifications
182- if hasattr (first_ann , 'classifications' ) and first_ann .classifications :
183- entry ["classifications" ] = self ._serialize_explicit_classifications (first_ann .classifications , root_frames )
208+ # Collect nested classifications from all annotations
209+ all_nested = []
210+ for ann in anns :
211+ if hasattr (ann , 'classifications' ) and ann .classifications :
212+ all_nested .extend (ann .classifications )
213+ if all_nested :
214+ entry ["classifications" ] = self ._serialize_explicit_classifications (all_nested , root_frames )
184215 return entry
185216
186217 def _serialize_explicit_classifications (self , classifications : List [Any ], root_frames : Tuple [int , int ]) -> List [Dict [str , Any ]]:
@@ -207,10 +238,12 @@ def _serialize_explicit_classifications(self, classifications: List[Any], root_f
207238 display_name = cls_list [0 ].name if cls_list [0 ].name else name
208239
209240 # Create answer entries for this nested classification
210- answers = []
241+ # De-duplicate by answer value
242+ seen_values = {} # value_key -> (answer_dict, nested_classifications)
211243 for cls in cls_list :
212244 # Get frames for this ClassificationAnnotation (from cls or root)
213245 cls_frames = self ._get_nested_frames (cls , [], root_frames )
246+ value_key = self ._get_value_key (cls )
214247
215248 if hasattr (cls .value , "answer" ):
216249 if isinstance (cls .value .answer , list ):
@@ -219,27 +252,78 @@ def _serialize_explicit_classifications(self, classifications: List[Any], root_f
219252 # Get frames for this checklist option (from opt or cls or root)
220253 opt_frames = self ._get_nested_frames (opt , cls_frames , root_frames )
221254 answer = {"name" : opt .name , "frames" : opt_frames }
222- # Recursively handle deeper nesting
255+ # Collect nested for recursion
256+ opt_nested = []
223257 if hasattr (opt , 'classifications' ) and opt .classifications :
224- answer ["classifications" ] = self ._serialize_explicit_classifications (opt .classifications , root_frames )
225- answers .append (answer )
258+ opt_nested = opt .classifications
259+ if opt_nested :
260+ answer ["classifications" ] = self ._serialize_explicit_classifications (opt_nested , root_frames )
261+ # Note: Checklist options don't need de-duplication
262+ # (they're already handled at the parent level)
263+ if value_key not in seen_values :
264+ seen_values [value_key ] = []
265+ seen_values [value_key ].append (answer )
226266 elif hasattr (cls .value .answer , "name" ):
227- # Radio
267+ # Radio - de-duplicate by name
228268 opt = cls .value .answer
269+ # Check if this answer has explicit frames
270+ has_explicit_frames = (hasattr (opt , 'start_frame' ) and opt .start_frame is not None and
271+ hasattr (opt , 'end_frame' ) and opt .end_frame is not None )
229272 # Get frames for this radio answer (from opt or cls or root)
230273 opt_frames = self ._get_nested_frames (opt , cls_frames , root_frames )
231- answer = {"name" : opt .name , "frames" : opt_frames }
232- # Recursively handle deeper nesting
233- if hasattr (opt , 'classifications' ) and opt .classifications :
234- answer ["classifications" ] = self ._serialize_explicit_classifications (opt .classifications , root_frames )
235- answers .append (answer )
274+
275+ # Check if we've already seen this answer name
276+ if value_key in seen_values :
277+ # Only merge frames if both have explicit frames, or neither does
278+ existing_has_explicit = seen_values [value_key ].get ("_has_explicit" , False )
279+ if has_explicit_frames and existing_has_explicit :
280+ # Both explicit - merge
281+ seen_values [value_key ]["frames" ].extend (opt_frames )
282+ elif has_explicit_frames and not existing_has_explicit :
283+ # Current is explicit, existing is implicit - replace with explicit
284+ seen_values [value_key ]["frames" ] = opt_frames
285+ seen_values [value_key ]["_has_explicit" ] = True
286+ elif not has_explicit_frames and existing_has_explicit :
287+ # Current is implicit, existing is explicit - keep existing (don't merge)
288+ pass
289+ else :
290+ # Both implicit - merge
291+ seen_values [value_key ]["frames" ].extend (opt_frames )
292+
293+ # Always merge nested classifications
294+ if hasattr (opt , 'classifications' ) and opt .classifications :
295+ seen_values [value_key ]["_nested" ].extend (opt .classifications )
296+ else :
297+ answer = {"name" : opt .name , "frames" : opt_frames , "_nested" : [], "_has_explicit" : has_explicit_frames }
298+ if hasattr (opt , 'classifications' ) and opt .classifications :
299+ answer ["_nested" ] = list (opt .classifications )
300+ seen_values [value_key ] = answer
236301 else :
237302 # Text - check for annotation-level nesting
238303 answer = {"value" : cls .value .answer , "frames" : cls_frames }
239- # Recursively handle deeper nesting at ClassificationAnnotation level
304+ # Collect nested
305+ text_nested = []
240306 if hasattr (cls , 'classifications' ) and cls .classifications :
241- answer ["classifications" ] = self ._serialize_explicit_classifications (cls .classifications , root_frames )
242- answers .append (answer )
307+ text_nested = cls .classifications
308+ if text_nested :
309+ answer ["classifications" ] = self ._serialize_explicit_classifications (text_nested , root_frames )
310+ if value_key not in seen_values :
311+ seen_values [value_key ] = []
312+ seen_values [value_key ].append (answer )
313+
314+ # Convert seen_values to answers list
315+ answers = []
316+ for value_key , value_data in seen_values .items ():
317+ if isinstance (value_data , list ):
318+ answers .extend (value_data )
319+ else :
320+ # Radio case - handle nested classifications
321+ if value_data .get ("_nested" ):
322+ value_data ["classifications" ] = self ._serialize_explicit_classifications (value_data ["_nested" ], root_frames )
323+ # Clean up internal fields
324+ value_data .pop ("_nested" , None )
325+ value_data .pop ("_has_explicit" , None )
326+ answers .append (value_data )
243327
244328 result .append ({
245329 "name" : display_name ,
0 commit comments