Skip to content

Commit dff24d7

Browse files
authored
Merge pull request #643 from Labelbox/ms/coco-handle-empty-annotation
handle empty annotations
2 parents 0933c82 + 8f1325d commit dff24d7

File tree

4 files changed

+51
-24
lines changed

4 files changed

+51
-24
lines changed

labelbox/data/annotation_types/data/raster.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,16 @@
22
from io import BytesIO
33
from typing import Callable, Optional, Union
44
from typing_extensions import Literal
5-
import numpy as np
6-
import requests
5+
76
from PIL import Image
87
from google.api_core import retry
98
from pydantic import BaseModel
109
from pydantic import root_validator
10+
from requests.exceptions import ConnectTimeout
11+
import requests
12+
import numpy as np
1113

14+
from labelbox.exceptions import InternalServerError
1215
from .base_data import BaseData
1316
from ..types import TypedArray
1417

@@ -113,7 +116,9 @@ def value(self) -> np.ndarray:
113116
def set_fetch_fn(self, fn):
114117
object.__setattr__(self, 'fetch_remote', lambda: fn(self))
115118

116-
@retry.Retry(deadline=60.)
119+
@retry.Retry(deadline=15.,
120+
predicate=retry.if_exception_type(ConnectTimeout,
121+
InternalServerError))
117122
def fetch_remote(self) -> bytes:
118123
"""
119124
Method for accessing url.
@@ -122,6 +127,8 @@ def fetch_remote(self) -> bytes:
122127
simply override this function
123128
"""
124129
response = requests.get(self.url)
130+
if response.status_code in [500, 502, 503, 504]:
131+
raise InternalServerError(response.text)
125132
response.raise_for_status()
126133
return response.content
127134

labelbox/data/annotation_types/data/text.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
from typing import Callable, Optional
22

33
import requests
4+
from requests.exceptions import ConnectTimeout
45
from google.api_core import retry
56
from pydantic import root_validator
67

8+
from labelbox.exceptions import InternalServerError
79
from .base_data import BaseData
810

911

@@ -47,7 +49,9 @@ def value(self) -> str:
4749
def set_fetch_fn(self, fn):
4850
object.__setattr__(self, 'fetch_remote', lambda: fn(self))
4951

50-
@retry.Retry(deadline=15.)
52+
@retry.Retry(deadline=15.,
53+
predicate=retry.if_exception_type(ConnectTimeout,
54+
InternalServerError))
5155
def fetch_remote(self) -> str:
5256
"""
5357
Method for accessing url.
@@ -56,6 +60,8 @@ def fetch_remote(self) -> str:
5660
simply override this function
5761
"""
5862
response = requests.get(self.url)
63+
if response.status_code in [500, 502, 503, 504]:
64+
raise labelbox.exceptions.InternalServerError(response.text)
5965
response.raise_for_status()
6066
return response.text
6167

labelbox/data/serialization/coco/categories.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import sys
2+
from hashlib import md5
23

34
from pydantic import BaseModel
45

@@ -11,4 +12,5 @@ class Categories(BaseModel):
1112

1213

1314
def hash_category_name(name: str) -> int:
14-
return hash(name) + sys.maxsize
15+
return int.from_bytes(
16+
md5(name.encode('utf-8')).hexdigest().encode('utf-8'), 'little')

labelbox/data/serialization/coco/instance_dataset.py

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# https://cocodataset.org/#format-data
22

33
from concurrent.futures import ProcessPoolExecutor, as_completed
4-
from typing import Any, Dict, List, Tuple
4+
from typing import Any, Dict, List, Tuple, Optional
55
from pathlib import Path
66

77
import numpy as np
@@ -15,14 +15,16 @@
1515
from .image import CocoImage, get_image, get_image_id
1616

1717

18-
def mask_to_coco_object_annotation(annotation: ObjectAnnotation, annot_idx: int,
19-
image_id: int,
20-
category_id: int) -> COCOObjectAnnotation:
18+
def mask_to_coco_object_annotation(
19+
annotation: ObjectAnnotation, annot_idx: int, image_id: int,
20+
category_id: int) -> Optional[COCOObjectAnnotation]:
2121
# This is going to fill any holes into the multipolygon
2222
# If you need to support holes use the panoptic data format
2323
shapely = annotation.value.shapely.simplify(1).buffer(0)
24+
2425
if shapely.is_empty:
25-
shapely = annotation.value.shapely.simplify(1).buffer(0.01)
26+
return
27+
2628
xmin, ymin, xmax, ymax = shapely.bounds
2729
# Iterate over polygon once or multiple polygon for each item
2830
area = shapely.area
@@ -89,6 +91,19 @@ def segmentations_to_common(class_annotations: COCOObjectAnnotation,
8991
return annotations
9092

9193

94+
def object_annotation_to_coco(
95+
annotation: ObjectAnnotation, annot_idx: int, image_id: int,
96+
category_id: int) -> Optional[COCOObjectAnnotation]:
97+
if isinstance(annotation.value, Mask):
98+
return mask_to_coco_object_annotation(annotation, annot_idx, image_id,
99+
category_id)
100+
elif isinstance(annotation.value, (Polygon, Rectangle)):
101+
return vector_to_coco_object_annotation(annotation, annot_idx, image_id,
102+
category_id)
103+
else:
104+
return None
105+
106+
92107
def process_label(
93108
label: Label,
94109
idx: int,
@@ -103,20 +118,16 @@ def process_label(
103118
categories = {}
104119
for class_name in annotation_lookup:
105120
for annotation in annotation_lookup[class_name]:
106-
if annotation.name not in categories:
107-
categories[annotation.name] = hash_category_name(
108-
annotation.name)
109-
if isinstance(annotation.value, Mask):
110-
coco_annotations.append(
111-
mask_to_coco_object_annotation(annotation, annot_idx,
112-
image_id,
113-
categories[annotation.name]))
114-
elif isinstance(annotation.value, (Polygon, Rectangle)):
115-
coco_annotations.append(
116-
vector_to_coco_object_annotation(
117-
annotation, annot_idx, image_id,
118-
categories[annotation.name]))
119-
annot_idx += 1
121+
category_id = categories.get(annotation.name) or hash_category_name(
122+
annotation.name)
123+
coco_annotation = object_annotation_to_coco(annotation, annot_idx,
124+
image_id, category_id)
125+
if coco_annotation is not None:
126+
coco_annotations.append(coco_annotation)
127+
if annotation.name not in categories:
128+
categories[annotation.name] = category_id
129+
annot_idx += 1
130+
120131
return image, coco_annotations, categories
121132

122133

@@ -147,6 +158,7 @@ def from_common(cls,
147158
future.result() for future in tqdm(as_completed(futures))
148159
]
149160
else:
161+
150162
results = [
151163
process_label(label, idx, image_root)
152164
for idx, label in enumerate(labels)

0 commit comments

Comments
 (0)