Skip to content

Commit 15bb17b

Browse files
chore: final nail
1 parent fb209f0 commit 15bb17b

File tree

2 files changed

+148
-25
lines changed

2 files changed

+148
-25
lines changed

libs/labelbox/src/labelbox/data/serialization/ndjson/label.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def from_common(
7676
yield from cls._create_relationship_annotations(label)
7777
yield from cls._create_non_video_annotations(label)
7878
yield from cls._create_video_annotations(label)
79-
yield from cls._create_audio_annotations(label)
79+
yield from cls._create_temporal_annotations(label)
8080

8181
@staticmethod
8282
def _get_consecutive_frames(
@@ -168,7 +168,7 @@ def _create_video_annotations(
168168
yield NDObject.from_common(segments, label.data)
169169

170170
@classmethod
171-
def _create_audio_annotations(
171+
def _create_temporal_annotations(
172172
cls, label: Label
173173
) -> Generator[BaseModel, None, None]:
174174
"""Create temporal annotations with nested classifications using new temporal classes."""

libs/labelbox/src/labelbox/data/serialization/ndjson/temporal.py

Lines changed: 146 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,14 @@ def _process_text_group(
8989
9090
Each annotation can have multiple (start, end, text) tuples.
9191
Groups by text value and merges frames.
92+
93+
Nested classifications are assigned to text values based on frame overlap.
9294
"""
9395
# Collect all text values with their frames
94-
text_data = defaultdict(lambda: {"frames": [], "nested": []})
96+
text_data = defaultdict(lambda: {"frames": []})
97+
98+
# Collect all nested classifications from all annotations
99+
all_nested_classifications = []
95100

96101
for ann in annotations:
97102
for start, end, text_value in ann.value:
@@ -104,9 +109,12 @@ def _process_text_group(
104109

105110
text_data[text_value]["frames"].append({"start": start, "end": end})
106111

107-
# Collect nested classifications
108-
if ann.classifications:
109-
text_data[text_value]["nested"].extend(ann.classifications)
112+
# Collect nested classifications at annotation level (not per text value)
113+
if ann.classifications:
114+
all_nested_classifications.extend(ann.classifications)
115+
116+
# Track which nested classifications were assigned
117+
assigned_nested = set()
110118

111119
# Build results
112120
results = []
@@ -119,15 +127,40 @@ def _process_text_group(
119127
"frames": unique_frames,
120128
}
121129

122-
# Process nested classifications recursively
123-
if data["nested"]:
130+
# Assign nested classifications based on frame overlap
131+
if all_nested_classifications:
124132
parent_frame_tuples = [(f["start"], f["end"]) for f in unique_frames]
125-
nested = _process_nested_classifications(data["nested"], parent_frame_tuples)
126-
if nested:
127-
entry["classifications"] = nested
133+
# Filter nested classifications that overlap with this text value's frames
134+
relevant_nested = _filter_classifications_by_overlap(
135+
all_nested_classifications, parent_frame_tuples
136+
)
137+
if relevant_nested:
138+
# Track that these were assigned
139+
for cls in relevant_nested:
140+
assigned_nested.add(id(cls))
141+
142+
# Pass ONLY THIS text value's frames so nested answers are filtered correctly
143+
nested = _process_nested_classifications(relevant_nested, parent_frame_tuples)
144+
if nested:
145+
entry["classifications"] = nested
128146

129147
results.append(entry)
130148

149+
# Log orphaned nested classifications (not assigned to any parent)
150+
if all_nested_classifications:
151+
for cls in all_nested_classifications:
152+
if id(cls) not in assigned_nested:
153+
if isinstance(cls, TemporalClassificationText):
154+
frames_info = cls.value[0][:2] if cls.value else "no frames"
155+
elif isinstance(cls, TemporalClassificationQuestion):
156+
frames_info = cls.value[0].frames if cls.value and cls.value[0].frames else "no frames"
157+
else:
158+
frames_info = "unknown"
159+
logger.warning(
160+
f"Orphaned nested classification '{cls.name}' with frames {frames_info} - "
161+
f"no parent text value found with overlapping frames."
162+
)
163+
131164
return results
132165

133166

@@ -140,28 +173,43 @@ def _process_question_group(
140173
141174
Each annotation has a list of TemporalClassificationAnswer objects.
142175
Groups by answer name and merges frames.
176+
177+
Nested classifications are assigned to answers based on frame overlap.
143178
"""
144-
# Collect all answers
145-
answer_data = defaultdict(lambda: {"frames": [], "nested": []})
179+
# Collect all answers with their frames
180+
answer_data = defaultdict(lambda: {"frames": []})
181+
182+
# Collect all nested classifications from all answers
183+
all_nested_by_answer = defaultdict(list)
146184

147185
for ann in annotations:
148186
for answer in ann.value: # value contains list of answers
149187
# Validate and collect frames
150188
valid_frames = []
151189
for start, end in answer.frames:
152-
if parent_frames and not _is_frame_subset([(start, end)], parent_frames):
153-
logger.warning(
154-
f"Answer '{answer.name}' frames ({start}, {end}) not subset of parent frames {parent_frames}. Discarding."
155-
)
156-
continue
190+
# If parent_frames provided, check if answer frames are subset of ANY parent frame
191+
# A child frame is a subset if: parent_start <= child_start AND child_end <= parent_end
192+
if parent_frames:
193+
is_valid = False
194+
for parent_start, parent_end in parent_frames:
195+
if parent_start <= start and end <= parent_end:
196+
is_valid = True
197+
break
198+
if not is_valid:
199+
# Don't log here - this is expected when processing inductive structures
200+
# Only log orphaned classifications that are never assigned to any parent
201+
continue
157202
valid_frames.append({"start": start, "end": end})
158203

159204
if valid_frames: # Only add if we have valid frames
160205
answer_data[answer.name]["frames"].extend(valid_frames)
161206

162-
# Collect nested classifications
207+
# Collect nested classifications at answer level
163208
if answer.classifications:
164-
answer_data[answer.name]["nested"].extend(answer.classifications)
209+
all_nested_by_answer[answer.name].extend(answer.classifications)
210+
211+
# Track which nested classifications were assigned
212+
assigned_nested = set()
165213

166214
# Build results
167215
results = []
@@ -177,15 +225,39 @@ def _process_question_group(
177225
"frames": unique_frames,
178226
}
179227

180-
# Process nested classifications recursively
181-
if data["nested"]:
228+
# Assign nested classifications based on frame overlap
229+
if all_nested_by_answer[answer_name]:
182230
parent_frame_tuples = [(f["start"], f["end"]) for f in unique_frames]
183-
nested = _process_nested_classifications(data["nested"], parent_frame_tuples)
184-
if nested:
185-
entry["classifications"] = nested
231+
# Filter nested classifications that overlap with this answer's frames
232+
relevant_nested = _filter_classifications_by_overlap(
233+
all_nested_by_answer[answer_name], parent_frame_tuples
234+
)
235+
if relevant_nested:
236+
# Track that these were assigned
237+
for cls in relevant_nested:
238+
assigned_nested.add(id(cls))
239+
240+
nested = _process_nested_classifications(relevant_nested, parent_frame_tuples)
241+
if nested:
242+
entry["classifications"] = nested
186243

187244
results.append(entry)
188245

246+
# Log orphaned nested classifications (not assigned to any answer)
247+
for answer_name, nested_list in all_nested_by_answer.items():
248+
for cls in nested_list:
249+
if id(cls) not in assigned_nested:
250+
if isinstance(cls, TemporalClassificationText):
251+
frames_info = cls.value[0][:2] if cls.value else "no frames"
252+
elif isinstance(cls, TemporalClassificationQuestion):
253+
frames_info = cls.value[0].frames if cls.value and cls.value[0].frames else "no frames"
254+
else:
255+
frames_info = "unknown"
256+
logger.warning(
257+
f"Orphaned nested classification '{cls.name}' in answer '{answer_name}' with frames {frames_info} - "
258+
f"no overlapping frames found with parent answer."
259+
)
260+
189261
return results
190262

191263

@@ -229,6 +301,57 @@ def _process_nested_classifications(
229301
return results
230302

231303

304+
def _filter_classifications_by_overlap(
305+
classifications: List[Union[TemporalClassificationText, TemporalClassificationQuestion]],
306+
parent_frames: List[Tuple[int, int]],
307+
) -> List[Union[TemporalClassificationText, TemporalClassificationQuestion]]:
308+
"""
309+
Filter classifications to only include those with frames that overlap with parent frames.
310+
311+
A classification is included if ANY of its frame ranges overlap with ANY parent frame range.
312+
"""
313+
relevant = []
314+
315+
for cls in classifications:
316+
has_overlap = False
317+
318+
# Check frames based on classification type
319+
if isinstance(cls, TemporalClassificationText):
320+
# Check text value frames
321+
for start, end, _ in cls.value:
322+
if _frames_overlap([(start, end)], parent_frames):
323+
has_overlap = True
324+
break
325+
elif isinstance(cls, TemporalClassificationQuestion):
326+
# Check answer frames
327+
for answer in cls.value:
328+
if _frames_overlap(answer.frames, parent_frames):
329+
has_overlap = True
330+
break
331+
332+
if has_overlap:
333+
relevant.append(cls)
334+
335+
return relevant
336+
337+
338+
def _frames_overlap(
339+
frames1: List[Tuple[int, int]],
340+
frames2: List[Tuple[int, int]],
341+
) -> bool:
342+
"""
343+
Check if any frame in frames1 overlaps with any frame in frames2.
344+
345+
Two frames (s1, e1) and (s2, e2) overlap if:
346+
max(s1, s2) <= min(e1, e2)
347+
"""
348+
for start1, end1 in frames1:
349+
for start2, end2 in frames2:
350+
if max(start1, start2) <= min(end1, end2):
351+
return True
352+
return False
353+
354+
232355
def _is_frame_subset(
233356
child_frames: List[Tuple[int, int]],
234357
parent_frames: List[Tuple[int, int]],

0 commit comments

Comments
 (0)