@@ -89,9 +89,14 @@ def _process_text_group(
8989
9090 Each annotation can have multiple (start, end, text) tuples.
9191 Groups by text value and merges frames.
92+
93+ Nested classifications are assigned to text values based on frame overlap.
9294 """
9395 # Collect all text values with their frames
94- text_data = defaultdict (lambda : {"frames" : [], "nested" : []})
96+ text_data = defaultdict (lambda : {"frames" : []})
97+
98+ # Collect all nested classifications from all annotations
99+ all_nested_classifications = []
95100
96101 for ann in annotations :
97102 for start , end , text_value in ann .value :
@@ -104,9 +109,12 @@ def _process_text_group(
104109
105110 text_data [text_value ]["frames" ].append ({"start" : start , "end" : end })
106111
107- # Collect nested classifications
108- if ann .classifications :
109- text_data [text_value ]["nested" ].extend (ann .classifications )
112+ # Collect nested classifications at annotation level (not per text value)
113+ if ann .classifications :
114+ all_nested_classifications .extend (ann .classifications )
115+
116+ # Track which nested classifications were assigned
117+ assigned_nested = set ()
110118
111119 # Build results
112120 results = []
@@ -119,15 +127,40 @@ def _process_text_group(
119127 "frames" : unique_frames ,
120128 }
121129
122- # Process nested classifications recursively
123- if data [ "nested" ] :
130+ # Assign nested classifications based on frame overlap
131+ if all_nested_classifications :
124132 parent_frame_tuples = [(f ["start" ], f ["end" ]) for f in unique_frames ]
125- nested = _process_nested_classifications (data ["nested" ], parent_frame_tuples )
126- if nested :
127- entry ["classifications" ] = nested
133+ # Filter nested classifications that overlap with this text value's frames
134+ relevant_nested = _filter_classifications_by_overlap (
135+ all_nested_classifications , parent_frame_tuples
136+ )
137+ if relevant_nested :
138+ # Track that these were assigned
139+ for cls in relevant_nested :
140+ assigned_nested .add (id (cls ))
141+
142+ # Pass ONLY THIS text value's frames so nested answers are filtered correctly
143+ nested = _process_nested_classifications (relevant_nested , parent_frame_tuples )
144+ if nested :
145+ entry ["classifications" ] = nested
128146
129147 results .append (entry )
130148
149+ # Log orphaned nested classifications (not assigned to any parent)
150+ if all_nested_classifications :
151+ for cls in all_nested_classifications :
152+ if id (cls ) not in assigned_nested :
153+ if isinstance (cls , TemporalClassificationText ):
154+ frames_info = cls .value [0 ][:2 ] if cls .value else "no frames"
155+ elif isinstance (cls , TemporalClassificationQuestion ):
156+ frames_info = cls .value [0 ].frames if cls .value and cls .value [0 ].frames else "no frames"
157+ else :
158+ frames_info = "unknown"
159+ logger .warning (
160+ f"Orphaned nested classification '{ cls .name } ' with frames { frames_info } - "
161+ f"no parent text value found with overlapping frames."
162+ )
163+
131164 return results
132165
133166
@@ -140,28 +173,43 @@ def _process_question_group(
140173
141174 Each annotation has a list of TemporalClassificationAnswer objects.
142175 Groups by answer name and merges frames.
176+
177+ Nested classifications are assigned to answers based on frame overlap.
143178 """
144- # Collect all answers
145- answer_data = defaultdict (lambda : {"frames" : [], "nested" : []})
179+ # Collect all answers with their frames
180+ answer_data = defaultdict (lambda : {"frames" : []})
181+
182+ # Collect all nested classifications from all answers
183+ all_nested_by_answer = defaultdict (list )
146184
147185 for ann in annotations :
148186 for answer in ann .value : # value contains list of answers
149187 # Validate and collect frames
150188 valid_frames = []
151189 for start , end in answer .frames :
152- if parent_frames and not _is_frame_subset ([(start , end )], parent_frames ):
153- logger .warning (
154- f"Answer '{ answer .name } ' frames ({ start } , { end } ) not subset of parent frames { parent_frames } . Discarding."
155- )
156- continue
190+ # If parent_frames provided, check if answer frames are subset of ANY parent frame
191+ # A child frame is a subset if: parent_start <= child_start AND child_end <= parent_end
192+ if parent_frames :
193+ is_valid = False
194+ for parent_start , parent_end in parent_frames :
195+ if parent_start <= start and end <= parent_end :
196+ is_valid = True
197+ break
198+ if not is_valid :
199+ # Don't log here - this is expected when processing inductive structures
200+ # Only log orphaned classifications that are never assigned to any parent
201+ continue
157202 valid_frames .append ({"start" : start , "end" : end })
158203
159204 if valid_frames : # Only add if we have valid frames
160205 answer_data [answer .name ]["frames" ].extend (valid_frames )
161206
162- # Collect nested classifications
207+ # Collect nested classifications at answer level
163208 if answer .classifications :
164- answer_data [answer .name ]["nested" ].extend (answer .classifications )
209+ all_nested_by_answer [answer .name ].extend (answer .classifications )
210+
211+ # Track which nested classifications were assigned
212+ assigned_nested = set ()
165213
166214 # Build results
167215 results = []
@@ -177,15 +225,39 @@ def _process_question_group(
177225 "frames" : unique_frames ,
178226 }
179227
180- # Process nested classifications recursively
181- if data [ "nested" ]:
228+ # Assign nested classifications based on frame overlap
229+ if all_nested_by_answer [ answer_name ]:
182230 parent_frame_tuples = [(f ["start" ], f ["end" ]) for f in unique_frames ]
183- nested = _process_nested_classifications (data ["nested" ], parent_frame_tuples )
184- if nested :
185- entry ["classifications" ] = nested
231+ # Filter nested classifications that overlap with this answer's frames
232+ relevant_nested = _filter_classifications_by_overlap (
233+ all_nested_by_answer [answer_name ], parent_frame_tuples
234+ )
235+ if relevant_nested :
236+ # Track that these were assigned
237+ for cls in relevant_nested :
238+ assigned_nested .add (id (cls ))
239+
240+ nested = _process_nested_classifications (relevant_nested , parent_frame_tuples )
241+ if nested :
242+ entry ["classifications" ] = nested
186243
187244 results .append (entry )
188245
246+ # Log orphaned nested classifications (not assigned to any answer)
247+ for answer_name , nested_list in all_nested_by_answer .items ():
248+ for cls in nested_list :
249+ if id (cls ) not in assigned_nested :
250+ if isinstance (cls , TemporalClassificationText ):
251+ frames_info = cls .value [0 ][:2 ] if cls .value else "no frames"
252+ elif isinstance (cls , TemporalClassificationQuestion ):
253+ frames_info = cls .value [0 ].frames if cls .value and cls .value [0 ].frames else "no frames"
254+ else :
255+ frames_info = "unknown"
256+ logger .warning (
257+ f"Orphaned nested classification '{ cls .name } ' in answer '{ answer_name } ' with frames { frames_info } - "
258+ f"no overlapping frames found with parent answer."
259+ )
260+
189261 return results
190262
191263
@@ -229,6 +301,57 @@ def _process_nested_classifications(
229301 return results
230302
231303
304+ def _filter_classifications_by_overlap (
305+ classifications : List [Union [TemporalClassificationText , TemporalClassificationQuestion ]],
306+ parent_frames : List [Tuple [int , int ]],
307+ ) -> List [Union [TemporalClassificationText , TemporalClassificationQuestion ]]:
308+ """
309+ Filter classifications to only include those with frames that overlap with parent frames.
310+
311+ A classification is included if ANY of its frame ranges overlap with ANY parent frame range.
312+ """
313+ relevant = []
314+
315+ for cls in classifications :
316+ has_overlap = False
317+
318+ # Check frames based on classification type
319+ if isinstance (cls , TemporalClassificationText ):
320+ # Check text value frames
321+ for start , end , _ in cls .value :
322+ if _frames_overlap ([(start , end )], parent_frames ):
323+ has_overlap = True
324+ break
325+ elif isinstance (cls , TemporalClassificationQuestion ):
326+ # Check answer frames
327+ for answer in cls .value :
328+ if _frames_overlap (answer .frames , parent_frames ):
329+ has_overlap = True
330+ break
331+
332+ if has_overlap :
333+ relevant .append (cls )
334+
335+ return relevant
336+
337+
338+ def _frames_overlap (
339+ frames1 : List [Tuple [int , int ]],
340+ frames2 : List [Tuple [int , int ]],
341+ ) -> bool :
342+ """
343+ Check if any frame in frames1 overlaps with any frame in frames2.
344+
345+ Two frames (s1, e1) and (s2, e2) overlap if:
346+ max(s1, s2) <= min(e1, e2)
347+ """
348+ for start1 , end1 in frames1 :
349+ for start2 , end2 in frames2 :
350+ if max (start1 , start2 ) <= min (end1 , end2 ):
351+ return True
352+ return False
353+
354+
232355def _is_frame_subset (
233356 child_frames : List [Tuple [int , int ]],
234357 parent_frames : List [Tuple [int , int ]],
0 commit comments