Skip to content

Commit 70484dd

Browse files
authored
Merge pull request #720 from Labelbox/ms/remove-subclass-from-metrics
remove subclass from metric calculations
2 parents 4f6d873 + 686a0d2 commit 70484dd

File tree

9 files changed

+118
-83
lines changed

9 files changed

+118
-83
lines changed

CHANGELOG.md

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# Changelog
22

3+
# In progress
4+
## Changed
5+
* Default behavior for metrics to not include subclasses in the calculation.
6+
7+
## Fixed
8+
* Polygon extraction from masks creating invalid polygons. This would cause issues in the coco converter.
9+
310
# Version 3.28.0 (2022-10-14)
411

512
### Added
@@ -45,7 +52,7 @@
4552
* Increase scalar metric value limit to 100m
4653
* Added deprecation warnings when updating project `queue_mode`
4754
### Fixed
48-
* Fix bug in `feature_confusion_matrix` and `confusion_matrix` causing FPs and FNs to be capped at 1 when there were no matching annotations
55+
* Fix bug in `feature_confusion_matrix` and `confusion_matrix` causing FPs and FNs to be capped at 1 when there were no matching annotations
4956

5057
# Version 3.26.2 (2022-09-06)
5158
### Added
@@ -65,7 +72,7 @@
6572
* Resets model run training metadata
6673
* `ModelRun.get_config()`
6774
* Fetches model run training metadata
68-
75+
6976
### Changed
7077
* `Model.create_model_run()`
7178
* Add training metadata config as a model run creation param

labelbox/data/metrics/confusion_matrix/calculation.py

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -268,29 +268,10 @@ def mask_confusion_matrix(ground_truths: List[ObjectAnnotation],
268268
elif has_no_annotations(ground_truths, predictions):
269269
return None
270270

271-
if include_subclasses:
272-
# This results in a faily drastically different value than without subclasses.
273-
# If we have subclasses set to True, then this is object detection with masks
274-
# Otherwise this will compute metrics on each pixel.
275-
pairs = _get_mask_pairs(ground_truths, predictions)
276-
return object_pair_confusion_matrix(
277-
pairs, include_subclasses=include_subclasses, iou=iou)
278-
279-
prediction_np = np.max([pred.value.draw(color=1) for pred in predictions],
280-
axis=0)
281-
ground_truth_np = np.max(
282-
[ground_truth.value.draw(color=1) for ground_truth in ground_truths],
283-
axis=0)
284-
if prediction_np.shape != ground_truth_np.shape:
285-
raise ValueError(
286-
"Prediction and mask must have the same shape."
287-
f" Found {prediction_np.shape}/{ground_truth_np.shape}.")
288-
289-
tp_mask = prediction_np == ground_truth_np == 1
290-
fp_mask = (prediction_np == 1) & (ground_truth_np == 0)
291-
fn_mask = (prediction_np == 0) & (ground_truth_np == 1)
292-
tn_mask = prediction_np == ground_truth_np == 0
293-
return [np.sum(tp_mask), np.sum(fp_mask), np.sum(fn_mask), np.sum(tn_mask)]
271+
pairs = _get_mask_pairs(ground_truths, predictions)
272+
return object_pair_confusion_matrix(pairs,
273+
include_subclasses=include_subclasses,
274+
iou=iou)
294275

295276

296277
def ner_confusion_matrix(ground_truths: List[ObjectAnnotation],

labelbox/data/metrics/confusion_matrix/confusion_matrix.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def confusion_matrix_metric(ground_truths: List[Union[
1616
ObjectAnnotation, ClassificationAnnotation]],
1717
predictions: List[Union[ObjectAnnotation,
1818
ClassificationAnnotation]],
19-
include_subclasses=True,
19+
include_subclasses=False,
2020
iou=0.5) -> List[ConfusionMatrixMetric]:
2121
"""
2222
Computes confusion matrix metrics between two sets of annotations.
@@ -47,7 +47,7 @@ def confusion_matrix_metric(ground_truths: List[Union[
4747
def feature_confusion_matrix_metric(
4848
ground_truths: List[Union[ObjectAnnotation, ClassificationAnnotation]],
4949
predictions: List[Union[ObjectAnnotation, ClassificationAnnotation]],
50-
include_subclasses=True,
50+
include_subclasses=False,
5151
iou: float = 0.5,
5252
) -> List[ConfusionMatrixMetric]:
5353
"""

labelbox/data/metrics/iou/iou.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def miou_metric(ground_truths: List[Union[ObjectAnnotation,
1313
ClassificationAnnotation]],
1414
predictions: List[Union[ObjectAnnotation,
1515
ClassificationAnnotation]],
16-
include_subclasses=True) -> List[ScalarMetric]:
16+
include_subclasses=False) -> List[ScalarMetric]:
1717
"""
1818
Computes miou between two sets of annotations.
1919
These annotations should relate to the same data (image/video).
@@ -68,7 +68,7 @@ def feature_miou_metric(ground_truths: List[Union[ObjectAnnotation,
6868

6969
def data_row_miou(ground_truth: Label,
7070
prediction: Label,
71-
include_subclasses=True) -> Optional[float]:
71+
include_subclasses=False) -> Optional[float]:
7272
"""
7373
7474
This function is no longer supported. Use miou() for raw values or miou_metric() for the metric

tests/data/metrics/confusion_matrix/conftest.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,16 @@
1111

1212
class NameSpace(SimpleNamespace):
1313

14-
def __init__(self, predictions, ground_truths, expected):
15-
super(NameSpace, self).__init__(predictions=predictions,
16-
ground_truths=ground_truths,
17-
expected=expected)
14+
def __init__(self,
15+
predictions,
16+
ground_truths,
17+
expected,
18+
expected_without_subclasses=None):
19+
super(NameSpace, self).__init__(
20+
predictions=predictions,
21+
ground_truths=ground_truths,
22+
expected=expected,
23+
expected_without_subclasses=expected_without_subclasses or expected)
1824

1925

2026
def get_radio(name, answer_name):
@@ -109,7 +115,8 @@ def get_object_pairs(tool_fn, **kwargs):
109115
**kwargs,
110116
subclasses=[get_radio("is_animal", answer_name="yes")])
111117
],
112-
expected={'cat': [1, 0, 0, 0]}),
118+
expected={'cat': [1, 0, 0, 0]},
119+
expected_without_subclasses={'cat': [1, 0, 0, 0]}),
113120
NameSpace(predictions=[
114121
tool_fn("cat",
115122
**kwargs,
@@ -121,7 +128,8 @@ def get_object_pairs(tool_fn, **kwargs):
121128
**kwargs,
122129
subclasses=[get_radio("is_animal", answer_name="no")])
123130
],
124-
expected={'cat': [0, 1, 0, 1]}),
131+
expected={'cat': [0, 1, 0, 1]},
132+
expected_without_subclasses={'cat': [1, 0, 0, 0]}),
125133
NameSpace(predictions=[
126134
tool_fn("cat",
127135
**kwargs,
@@ -136,7 +144,8 @@ def get_object_pairs(tool_fn, **kwargs):
136144
**kwargs,
137145
subclasses=[get_radio("is_animal", answer_name="no")])
138146
],
139-
expected={'cat': [1, 1, 0, 0]}),
147+
expected={'cat': [1, 1, 0, 0]},
148+
expected_without_subclasses={'cat': [1, 1, 0, 0]}),
140149
NameSpace(predictions=[
141150
tool_fn("cat",
142151
**kwargs,
@@ -154,6 +163,10 @@ def get_object_pairs(tool_fn, **kwargs):
154163
expected={
155164
'cat': [0, 1, 0, 1],
156165
'dog': [0, 1, 0, 0]
166+
},
167+
expected_without_subclasses={
168+
'cat': [1, 0, 0, 0],
169+
'dog': [0, 1, 0, 0]
157170
}),
158171
NameSpace(
159172
predictions=[tool_fn("cat", **kwargs),
@@ -171,7 +184,10 @@ def get_object_pairs(tool_fn, **kwargs):
171184
ground_truths=[tool_fn("cat", **kwargs),
172185
tool_fn("cat", **kwargs)],
173186
expected={'cat': [1, 0, 0, 1]}),
174-
NameSpace(predictions=[], ground_truths=[], expected=[]),
187+
NameSpace(predictions=[],
188+
ground_truths=[],
189+
expected=[],
190+
expected_without_subclasses=[]),
175191
NameSpace(predictions=[],
176192
ground_truths=[tool_fn("cat", **kwargs)],
177193
expected={'cat': [0, 0, 0, 1]}),
@@ -183,7 +199,7 @@ def get_object_pairs(tool_fn, **kwargs):
183199
expected={
184200
'cat': [0, 1, 0, 0],
185201
'dog': [0, 0, 0, 1]
186-
}),
202+
})
187203
]
188204

189205

tests/data/metrics/confusion_matrix/test_confusion_matrix_data_row.py

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,25 @@
1414
])
1515
def test_overlapping_objects(tool_examples):
1616
for example in tool_examples:
17-
score = confusion_matrix_metric(example.ground_truths,
18-
example.predictions)
1917

20-
if len(example.expected) == 0:
21-
assert len(score) == 0
22-
else:
23-
expected = [0, 0, 0, 0]
24-
for expected_values in example.expected.values():
25-
for idx in range(4):
26-
expected[idx] += expected_values[idx]
27-
assert score[0].value == tuple(
28-
expected), f"{example.predictions},{example.ground_truths}"
18+
for include_subclasses, expected_attr_name in [[
19+
True, 'expected'
20+
], [False, 'expected_without_subclasses']]:
21+
score = confusion_matrix_metric(
22+
example.ground_truths,
23+
example.predictions,
24+
include_subclasses=include_subclasses)
25+
26+
if len(getattr(example, expected_attr_name)) == 0:
27+
assert len(score) == 0
28+
else:
29+
expected = [0, 0, 0, 0]
30+
for expected_values in getattr(example,
31+
expected_attr_name).values():
32+
for idx in range(4):
33+
expected[idx] += expected_values[idx]
34+
assert score[0].value == tuple(
35+
expected), f"{example.predictions},{example.ground_truths}"
2936

3037

3138
@parametrize("tool_examples",

tests/data/metrics/confusion_matrix/test_confusion_matrix_feature.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,21 @@
1414
])
1515
def test_overlapping_objects(tool_examples):
1616
for example in tool_examples:
17-
metrics = feature_confusion_matrix_metric(example.ground_truths,
18-
example.predictions)
19-
20-
metrics = {r.feature_name: list(r.value) for r in metrics}
21-
if len(example.expected) == 0:
22-
assert len(metrics) == 0
23-
else:
24-
assert metrics == example.expected, f"{example.predictions},{example.ground_truths}"
17+
for include_subclasses, expected_attr_name in [[
18+
True, 'expected'
19+
], [False, 'expected_without_subclasses']]:
20+
metrics = feature_confusion_matrix_metric(
21+
example.ground_truths,
22+
example.predictions,
23+
include_subclasses=include_subclasses)
24+
25+
metrics = {r.feature_name: list(r.value) for r in metrics}
26+
if len(getattr(example, expected_attr_name)) == 0:
27+
assert len(metrics) == 0
28+
else:
29+
assert metrics == getattr(
30+
example, expected_attr_name
31+
), f"{example.predictions},{example.ground_truths}"
2532

2633

2734
@parametrize("tool_examples",

tests/data/metrics/iou/data_row/conftest.py

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,26 @@ def __init__(self,
1212
predictions,
1313
labels,
1414
expected,
15+
expected_without_subclasses=None,
1516
data_row_expected=None,
1617
media_attributes=None,
1718
metadata=None,
1819
classifications=None):
19-
super(NameSpace,
20-
self).__init__(predictions=predictions,
21-
labels={
22-
'DataRow ID': 'ckppihxc10005aeyjen11h7jh',
23-
'Labeled Data': "https://.jpg",
24-
'Media Attributes': media_attributes or {},
25-
'DataRow Metadata': metadata or [],
26-
'Label': {
27-
'objects': labels,
28-
'classifications': classifications or []
29-
}
30-
},
31-
expected=expected,
32-
data_row_expected=data_row_expected)
20+
super(NameSpace, self).__init__(
21+
predictions=predictions,
22+
labels={
23+
'DataRow ID': 'ckppihxc10005aeyjen11h7jh',
24+
'Labeled Data': "https://.jpg",
25+
'Media Attributes': media_attributes or {},
26+
'DataRow Metadata': metadata or [],
27+
'Label': {
28+
'objects': labels,
29+
'classifications': classifications or []
30+
}
31+
},
32+
expected=expected,
33+
expected_without_subclasses=expected_without_subclasses or expected,
34+
data_row_expected=data_row_expected)
3335

3436

3537
@pytest.fixture
@@ -645,7 +647,8 @@ def test_box_with_wrong_subclass():
645647
'answer': 'not_test'
646648
}]
647649
}],
648-
expected=0.5)
650+
expected=0.5,
651+
expected_without_subclasses=1.0)
649652

650653

651654
@pytest.fixture
@@ -780,4 +783,4 @@ def partial_matching_ner():
780783
"end": 5
781784
}
782785
}],
783-
expected=0.2857142857142857)
786+
expected=0.2857142857142857)

tests/data/metrics/iou/data_row/test_data_row_iou.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,29 @@ def check_iou(pair, mask=None):
2121
annotation.value.mask.arr = np.frombuffer(
2222
base64.b64decode(annotation.value.mask.url.encode('utf-8')),
2323
dtype=np.uint8).reshape((32, 32, 3))
24-
assert math.isclose(data_row_miou(label, prediction), pair.expected)
25-
assert math.isclose(
26-
miou_metric(label.annotations, prediction.annotations)[0].value,
27-
pair.expected)
28-
feature_ious = feature_miou_metric(label.annotations,
29-
prediction.annotations)
30-
assert len(feature_ious
31-
) == 1 # The tests run here should only have one class present.
32-
assert math.isclose(feature_ious[0].value, pair.expected)
24+
25+
for include_subclasses, expected_attr_name in [[
26+
True, 'expected'
27+
], [False, 'expected_without_subclasses']]:
28+
assert math.isclose(
29+
data_row_miou(label,
30+
prediction,
31+
include_subclasses=include_subclasses),
32+
getattr(pair, expected_attr_name))
33+
assert math.isclose(
34+
miou_metric(label.annotations,
35+
prediction.annotations,
36+
include_subclasses=include_subclasses)[0].value,
37+
getattr(pair, expected_attr_name))
38+
feature_ious = feature_miou_metric(
39+
label.annotations,
40+
prediction.annotations,
41+
include_subclasses=include_subclasses)
42+
assert len(
43+
feature_ious
44+
) == 1 # The tests run here should only have one class present.
45+
assert math.isclose(feature_ious[0].value,
46+
getattr(pair, expected_attr_name))
3347

3448

3549
def check_iou_checklist(pair, mask=None):
@@ -122,4 +136,4 @@ def test_others(pair):
122136
strings_to_fixtures(
123137
["matching_ner", "no_matching_ner", "partial_matching_ner"]))
124138
def test_ner(pair):
125-
check_iou(pair)
139+
check_iou(pair)

0 commit comments

Comments
 (0)