Skip to content

Commit 1a53cbd

Browse files
authored
Merge pull request #4 from Labelbox/ENG-495
[ENG-495] Expose public API for use in k8s exporters
2 parents e5f7dc2 + 902d1a7 commit 1a53cbd

File tree

4 files changed

+119
-67
lines changed

4 files changed

+119
-67
lines changed

labelbox/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"The Labelbox python package."
22

3-
__version__ = '0.0.3'
3+
__version__ = '0.0.4'

labelbox/exporters/coco_exporter.py

Lines changed: 73 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22
Module for converting labelbox.com JSON exports to MS COCO format.
33
"""
44

5-
import json
65
import datetime as dt
6+
import json
77
import logging
8+
from typing import Any, Dict
9+
10+
from PIL import Image
11+
import requests
812
from shapely import wkt
913
from shapely.geometry import Polygon
10-
import requests
11-
from PIL import Image
1214

1315
from labelbox.exceptions import UnknownFormatError
1416

@@ -25,15 +27,7 @@ def from_json(labeled_data, coco_output, label_format='WKT'):
2527
for data in label_data:
2628
# Download and get image name
2729
try:
28-
image = {
29-
"id": data['ID'],
30-
"file_name": data['Labeled Data'],
31-
"license": None,
32-
"flickr_url": data['Labeled Data'],
33-
"coco_url": data['Labeled Data'],
34-
"date_captured": None,
35-
}
36-
_add_label(coco, image, data['Label'], label_format)
30+
add_label(coco, data['ID'], data['Labeled Data'], data['Label'], label_format)
3731
except requests.exceptions.MissingSchema as exc:
3832
logging.exception(exc)
3933
continue
@@ -45,31 +39,57 @@ def from_json(labeled_data, coco_output, label_format='WKT'):
4539
file_handle.write(json.dumps(coco))
4640

4741

48-
def make_coco_metadata(project_name, created_by):
49-
"Initializes COCO export data structure."
50-
coco = {
51-
'info': None,
42+
def make_coco_metadata(project_name: str, created_by: str) -> Dict[str, Any]:
43+
"""Initializes COCO export data structure.
44+
45+
Args:
46+
project_name: name of the project
47+
created_by: email of the project creator
48+
49+
Returns:
50+
The COCO export represented as a dictionary.
51+
"""
52+
return {
53+
'info': {
54+
'year': dt.datetime.now(dt.timezone.utc).year,
55+
'version': None,
56+
'description': project_name,
57+
'contributor': created_by,
58+
'url': 'labelbox.com',
59+
'date_created': dt.datetime.now(dt.timezone.utc).isoformat()
60+
},
5261
'images': [],
5362
'annotations': [],
5463
'licenses': [],
5564
'categories': []
5665
}
5766

58-
coco['info'] = {
59-
'year': dt.datetime.now(dt.timezone.utc).year,
60-
'version': None,
61-
'description': project_name,
62-
'contributor': created_by,
63-
'url': 'labelbox.com',
64-
'date_created': dt.datetime.now(dt.timezone.utc).isoformat()
65-
}
66-
67-
return coco
6867

69-
70-
def _add_label(coco, image, labels, label_format):
71-
"Incrementally updates COCO export data structure with a new label."
72-
response = requests.get(image['coco_url'], stream=True)
68+
def add_label(
69+
coco: Dict[str, Any], label_id: str, image_url: str,
70+
labels: Dict[str, Any], label_format: str):
71+
"""Incrementally updates COCO export data structure with a new label.
72+
73+
Args:
74+
coco: The current COCO export, will be incrementally updated by this method.
75+
label_id: ID for the instance to write
76+
image_url: URL to download image file from
77+
labels: Labelbox formatted labels to use for generating annotation
78+
label_format: Format of the labeled data. Valid options are: "WKT" and
79+
"XY", default is "WKT".
80+
81+
Returns:
82+
The updated COCO export represented as a dictionary.
83+
"""
84+
image = {
85+
"id": label_id,
86+
"file_name": image_url,
87+
"license": None,
88+
"flickr_url": image_url,
89+
"coco_url": image_url,
90+
"date_captured": None,
91+
}
92+
response = requests.get(image_url, stream=True)
7393
response.raw.decode_content = True
7494
image['width'], image['height'] = Image.open(response.raw).size
7595

@@ -96,25 +116,29 @@ def _add_label(coco, image, labels, label_format):
96116
coco['categories'].append(category)
97117

98118
polygons = _get_polygons(label_format, label_data)
99-
100-
for polygon in polygons:
101-
segmentation = []
102-
for x_val, y_val in polygon.exterior.coords:
103-
segmentation.extend([x_val, image['height'] - y_val])
104-
105-
annotation = {
106-
"id": len(coco['annotations']) + 1,
107-
"image_id": image['id'],
108-
"category_id": category_id,
109-
"segmentation": [segmentation],
110-
"area": polygon.area, # float
111-
"bbox": [polygon.bounds[0], polygon.bounds[1],
112-
polygon.bounds[2] - polygon.bounds[0],
113-
polygon.bounds[3] - polygon.bounds[1]],
114-
"iscrowd": 0
115-
}
116-
117-
coco['annotations'].append(annotation)
119+
_append_polygons_as_annotations(coco, image, category_id, polygons)
120+
121+
122+
def _append_polygons_as_annotations(coco, image, category_id, polygons):
123+
"Adds `polygons` as annotations in the `coco` export"
124+
for polygon in polygons:
125+
segmentation = []
126+
for x_val, y_val in polygon.exterior.coords:
127+
segmentation.extend([x_val, image['height'] - y_val])
128+
129+
annotation = {
130+
"id": len(coco['annotations']) + 1,
131+
"image_id": image['id'],
132+
"category_id": category_id,
133+
"segmentation": [segmentation],
134+
"area": polygon.area, # float
135+
"bbox": [polygon.bounds[0], polygon.bounds[1],
136+
polygon.bounds[2] - polygon.bounds[0],
137+
polygon.bounds[3] - polygon.bounds[1]],
138+
"iscrowd": 0
139+
}
140+
141+
coco['annotations'].append(annotation)
118142

119143

120144
def _get_polygons(label_format, label_data):

labelbox/exporters/voc_exporter.py

Lines changed: 43 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22
Module for converting labelbox.com JSON exports to Pascal VOC 2012 format.
33
"""
44

5-
import os
65
import json
76
import logging
8-
from shapely import wkt
9-
import requests
7+
import os
8+
from typing import Any, Dict
9+
1010
from PIL import Image
11+
import requests
12+
from shapely import wkt
1113

1214
from labelbox.exceptions import UnknownFormatError
1315
from labelbox.exporters.pascal_voc_writer import Writer as PascalWriter
@@ -44,7 +46,13 @@ def from_json(labeled_data, annotations_output_dir, images_output_dir,
4446

4547
for data in label_data:
4648
try:
47-
_write_label(data, label_format, images_output_dir, annotations_output_dir)
49+
write_label(
50+
data['ID'],
51+
data['Labeled Data'],
52+
data['Label'],
53+
label_format,
54+
images_output_dir,
55+
annotations_output_dir)
4856

4957
except requests.exceptions.MissingSchema as exc:
5058
logging.exception(exc)
@@ -54,43 +62,56 @@ def from_json(labeled_data, annotations_output_dir, images_output_dir,
5462
continue
5563

5664

57-
def _write_label(
58-
data, label_format, images_output_dir, annotations_output_dir):
59-
"Writes a Pascal VOC formatted image and label pair to disk."
65+
def write_label( # pylint: disable-msg=too-many-arguments
66+
label_id: str, image_url: str, labels: Dict[str, Any], label_format: str,
67+
images_output_dir: str, annotations_output_dir: str):
68+
"""Writes a single Pascal VOC formatted image and label pair to disk.
69+
70+
Args:
71+
label_id: ID for the instance to write
72+
image_url: URL to download image file from
73+
labels: Labelbox formatted labels to use for generating annotation
74+
label_format: Format of the labeled data. Valid options are: "WKT" and
75+
"XY", default is "WKT".
76+
annotations_output_dir: File path of directory to write Pascal VOC
77+
annotation files.
78+
images_output_dir: File path of directory to write images.
79+
"""
6080
# Download image and save it
61-
response = requests.get(data['Labeled Data'], stream=True)
81+
response = requests.get(image_url, stream=True)
6282
response.raw.decode_content = True
6383
image = Image.open(response.raw)
64-
image_name = ('{img_id}.{ext}'.format(img_id=data['ID'], ext=image.format.lower()))
65-
image_fqn = os.path.join(images_output_dir, image_name)
84+
image_fqn = os.path.join(
85+
images_output_dir,
86+
'{img_id}.{ext}'.format(img_id=label_id, ext=image.format.lower()))
6687
image.save(image_fqn, format=image.format)
6788

6889
# generate image annotation in Pascal VOC
6990
width, height = image.size
7091
xml_writer = PascalWriter(image_fqn, width, height)
7192

7293
# remove classification labels (Skip, etc...)
73-
if not callable(getattr(data['Label'], 'keys', None)):
94+
if not callable(getattr(labels, 'keys', None)):
7495
# skip if no categories (e.g. "Skip")
7596
return
7697

7798
# convert label to Pascal VOC format
78-
for category_name, wkt_data in data['Label'].items():
99+
for category_name, paths in labels.items():
79100
if label_format == 'WKT':
80101
xml_writer = _add_pascal_object_from_wkt(
81-
xml_writer, img_height=height, wkt_data=wkt_data,
102+
xml_writer, img_height=height, wkt_data=paths,
82103
label=category_name)
83104
elif label_format == 'XY':
84105
xml_writer = _add_pascal_object_from_xy(
85-
xml_writer, img_height=height, polygons=wkt_data,
106+
xml_writer, img_height=height, polygons=paths,
86107
label=category_name)
87108
else:
88109
exc = UnknownFormatError(label_format=label_format)
89110
logging.exception(exc.message)
90111
raise exc
91112

92113
# write Pascal VOC xml annotation for image
93-
xml_writer.save(os.path.join(annotations_output_dir, '{}.xml'.format(data['ID'])))
114+
xml_writer.save(os.path.join(annotations_output_dir, '{}.xml'.format(label_id)))
94115

95116

96117
def _add_pascal_object_from_wkt(xml_writer, img_height, wkt_data, label):
@@ -112,10 +133,16 @@ def _add_pascal_object_from_wkt(xml_writer, img_height, wkt_data, label):
112133

113134

114135
def _add_pascal_object_from_xy(xml_writer, img_height, polygons, label):
136+
if not isinstance(polygons, list):
137+
# polygons is not [{'geometry': [xy]}] nor [[xy]]
138+
return xml_writer
115139
for polygon in polygons:
116140
if 'geometry' in polygon: # V3
117141
polygon = polygon['geometry']
118-
assert isinstance(polygon, list) # V2 and V3
142+
if not isinstance(polygon, list) \
143+
or not all(map(lambda p: 'x' in p and 'y' in p, polygon)):
144+
# couldn't make a list of points, give up
145+
return xml_writer
119146

120147
xy_coords = []
121148
for point in polygon:

setup.cfg

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ classifiers =
2525

2626
[options]
2727
zip_safe = False
28-
packages = find_namespace:
28+
packages =
29+
labelbox
2930
include_package_data = True
3031
install_requires =
3132
jinja2

0 commit comments

Comments
 (0)