Skip to content

Commit f11ac19

Browse files
authored
Merge pull request #673 from Labelbox/jtso/al-3329
[AL-3329] Document Annotation Type for Export
2 parents 1dfd302 + 896f96b commit f11ac19

File tree

8 files changed

+193
-1
lines changed

8 files changed

+193
-1
lines changed

labelbox/data/serialization/labelbox_v1/label.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ class LBV1Label(BaseModel):
146146
skipped: Optional[bool] = Extra('Skipped')
147147
media_type: Optional[str] = Extra('media_type')
148148
data_split: Optional[str] = Extra('Data Split')
149+
global_key: Optional[str] = Extra('Global Key')
149150

150151
def to_common(self) -> Label:
151152
if isinstance(self.label, list):

labelbox/data/serialization/labelbox_v1/objects.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ class LBV1ObjectBase(LBV1Feature):
2222
instanceURI: Optional[str] = None
2323
classifications: List[Union[LBV1Text, LBV1Radio, LBV1Dropdown,
2424
LBV1Checklist]] = []
25+
page: Optional[int] = None
26+
unit: Optional[str] = None
2527

2628
def dict(self, *args, **kwargs) -> Dict[str, Any]:
2729
res = super().dict(*args, **kwargs)
@@ -262,7 +264,7 @@ def from_common(cls, text_entity: TextEntity,
262264
class LBV1Objects(BaseModel):
263265
objects: List[Union[LBV1Line, LBV1Point, LBV1Polygon, LBV1Rectangle,
264266
LBV1TextEntity, LBV1Mask, LBV1TIPoint, LBV1TILine,
265-
LBV1TIPolygon, LBV1TIRectangle]]
267+
LBV1TIPolygon, LBV1TIRectangle,]]
266268

267269
def to_common(self) -> List[ObjectAnnotation]:
268270
objects = [
@@ -285,6 +287,8 @@ def to_common(self) -> List[ObjectAnnotation]:
285287
'color': obj.color,
286288
'feature_id': obj.feature_id,
287289
'value': obj.value,
290+
'page': obj.page,
291+
'unit': obj.unit,
288292
}) for obj in self.objects
289293
]
290294
return objects
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
[{
2+
"ID": "cl6xnzi4a7ldn0729381g7104",
3+
"DataRow ID": "cl6xnv9h61fv0085yhtoq06ht",
4+
"Labeled Data": "https://storage.labelbox.com/ckcz6bubudyfi0855o1dt1g9s%2F4cef4e08-e13d-8a5e-fbbf-c7624babb490-Airbnb_%20Labelbox%20-%20Focus%20on%20Workforce%20-%20Labelbox%20Labeling%20Operations%20(1).pdf?Expires=1661971050348&KeyName=labelbox-assets-key-3&Signature=JK6ral5CXF7T9Q5LaQqKvJy5A2A",
5+
"Label": {
6+
"objects": [{
7+
"featureId": "cl6xnzjpq0dmr07yocs2vfot8",
8+
"schemaId": "cl6xnuwt95lqq07330tbb3mfd",
9+
"color": "#1CE6FF",
10+
"title": "boxy",
11+
"value": "boxy",
12+
"bbox": {
13+
"top": 144.68,
14+
"left": 107.84,
15+
"height": 441.6,
16+
"width": 9.48
17+
},
18+
"page": 0,
19+
"unit": "POINTS",
20+
"instanceURI": "https://api.labelbox.com/masks/feature/cl6xnzjpq0dmr07yocs2vfot8?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2NjOWZtbXc0aGNkMDczOHFpeWM2YW54Iiwib3JnYW5pemF0aW9uSWQiOiJja2N6NmJ1YnVkeWZpMDg1NW8xZHQxZzlzIiwiaWF0IjoxNjYwNzYxNDUwLCJleHAiOjE2NjMzNTM0NTB9.X4-j6zee8o685PUrL9C6oC2m6TayKuJQHhN8iLgG8kI"
21+
}, {
22+
"featureId": "cl6xnzjpq0dms07yobwv68gxf",
23+
"schemaId": "cl6xnuwt95lqq07330tbb3mfd",
24+
"color": "#1CE6FF",
25+
"title": "boxy",
26+
"value": "boxy",
27+
"bbox": {
28+
"top": 162.73,
29+
"left": 32.45,
30+
"height": 388.17,
31+
"width": 101.66
32+
},
33+
"page": 4,
34+
"unit": "POINTS",
35+
"instanceURI": "https://api.labelbox.com/masks/feature/cl6xnzjpq0dms07yobwv68gxf?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2NjOWZtbXc0aGNkMDczOHFpeWM2YW54Iiwib3JnYW5pemF0aW9uSWQiOiJja2N6NmJ1YnVkeWZpMDg1NW8xZHQxZzlzIiwiaWF0IjoxNjYwNzYxNDUwLCJleHAiOjE2NjMzNTM0NTB9.X4-j6zee8o685PUrL9C6oC2m6TayKuJQHhN8iLgG8kI"
36+
}, {
37+
"featureId": "cl6xnzjpq0dmt07yo8pp45gru",
38+
"schemaId": "cl6xnuwt95lqq07330tbb3mfd",
39+
"color": "#1CE6FF",
40+
"title": "boxy",
41+
"value": "boxy",
42+
"bbox": {
43+
"top": 223.26,
44+
"left": 251.42,
45+
"height": 457.04,
46+
"width": 186.78
47+
},
48+
"page": 7,
49+
"unit": "POINTS",
50+
"instanceURI": "https://api.labelbox.com/masks/feature/cl6xnzjpq0dmt07yo8pp45gru?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2NjOWZtbXc0aGNkMDczOHFpeWM2YW54Iiwib3JnYW5pemF0aW9uSWQiOiJja2N6NmJ1YnVkeWZpMDg1NW8xZHQxZzlzIiwiaWF0IjoxNjYwNzYxNDUwLCJleHAiOjE2NjMzNTM0NTB9.X4-j6zee8o685PUrL9C6oC2m6TayKuJQHhN8iLgG8kI"
51+
}, {
52+
"featureId": "cl6xnzjpq0dmu07yo2qik0en4",
53+
"schemaId": "cl6xnuwt95lqq07330tbb3mfd",
54+
"color": "#1CE6FF",
55+
"title": "boxy",
56+
"value": "boxy",
57+
"bbox": {
58+
"top": 32.52,
59+
"left": 218.17,
60+
"height": 231.73,
61+
"width": 110.56
62+
},
63+
"page": 6,
64+
"unit": "POINTS",
65+
"instanceURI": "https://api.labelbox.com/masks/feature/cl6xnzjpq0dmu07yo2qik0en4?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2NjOWZtbXc0aGNkMDczOHFpeWM2YW54Iiwib3JnYW5pemF0aW9uSWQiOiJja2N6NmJ1YnVkeWZpMDg1NW8xZHQxZzlzIiwiaWF0IjoxNjYwNzYxNDUwLCJleHAiOjE2NjMzNTM0NTB9.X4-j6zee8o685PUrL9C6oC2m6TayKuJQHhN8iLgG8kI"
66+
}, {
67+
"featureId": "cl6xnzjpq0dmv07yo7phz7ofz",
68+
"schemaId": "cl6xnuwt95lqq07330tbb3mfd",
69+
"color": "#1CE6FF",
70+
"title": "boxy",
71+
"value": "boxy",
72+
"bbox": {
73+
"top": 117.39,
74+
"left": 4.25,
75+
"height": 456.92,
76+
"width": 164.83
77+
},
78+
"page": 7,
79+
"unit": "POINTS",
80+
"instanceURI": "https://api.labelbox.com/masks/feature/cl6xnzjpq0dmv07yo7phz7ofz?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2NjOWZtbXc0aGNkMDczOHFpeWM2YW54Iiwib3JnYW5pemF0aW9uSWQiOiJja2N6NmJ1YnVkeWZpMDg1NW8xZHQxZzlzIiwiaWF0IjoxNjYwNzYxNDUwLCJleHAiOjE2NjMzNTM0NTB9.X4-j6zee8o685PUrL9C6oC2m6TayKuJQHhN8iLgG8kI"
81+
}, {
82+
"featureId": "cl6xnzjpq0dmw07yofocp6uf6",
83+
"schemaId": "cl6xnuwt95lqq07330tbb3mfd",
84+
"color": "#1CE6FF",
85+
"title": "boxy",
86+
"value": "boxy",
87+
"bbox": {
88+
"top": 82.13,
89+
"left": 217.28,
90+
"height": 279.76,
91+
"width": 82.43
92+
},
93+
"page": 8,
94+
"unit": "POINTS",
95+
"instanceURI": "https://api.labelbox.com/masks/feature/cl6xnzjpq0dmw07yofocp6uf6?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2NjOWZtbXc0aGNkMDczOHFpeWM2YW54Iiwib3JnYW5pemF0aW9uSWQiOiJja2N6NmJ1YnVkeWZpMDg1NW8xZHQxZzlzIiwiaWF0IjoxNjYwNzYxNDUwLCJleHAiOjE2NjMzNTM0NTB9.X4-j6zee8o685PUrL9C6oC2m6TayKuJQHhN8iLgG8kI"
96+
}, {
97+
"featureId": "cl6xnzjpq0dmx07yo0qh40z0n",
98+
"schemaId": "cl6xnuwt95lqq07330tbb3mfd",
99+
"color": "#1CE6FF",
100+
"title": "boxy",
101+
"value": "boxy",
102+
"bbox": {
103+
"top": 298.12,
104+
"left": 83.34,
105+
"height": 203.83,
106+
"width": 0.38
107+
},
108+
"page": 3,
109+
"unit": "POINTS",
110+
"instanceURI": "https://api.labelbox.com/masks/feature/cl6xnzjpq0dmx07yo0qh40z0n?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2NjOWZtbXc0aGNkMDczOHFpeWM2YW54Iiwib3JnYW5pemF0aW9uSWQiOiJja2N6NmJ1YnVkeWZpMDg1NW8xZHQxZzlzIiwiaWF0IjoxNjYwNzYxNDUwLCJleHAiOjE2NjMzNTM0NTB9.X4-j6zee8o685PUrL9C6oC2m6TayKuJQHhN8iLgG8kI"
111+
}],
112+
"classifications": [],
113+
"relationships": []
114+
},
115+
"Created By": "jtso@labelbox.com",
116+
"Project Name": "PDF MAL Test",
117+
"Created At": "2022-08-17T18:37:18.000Z",
118+
"Updated At": "2022-08-17T18:37:20.073Z",
119+
"Seconds to Label": 15.003,
120+
"External ID": "Airbnb_ Labelbox - Focus on Workforce - Labelbox Labeling Operations (1).pdf",
121+
"Global Key": null,
122+
"Agreement": -1,
123+
"Benchmark Agreement": -1,
124+
"Benchmark ID": null,
125+
"Dataset Name": "PDF ",
126+
"Reviews": [],
127+
"View Label": "https://editor.labelbox.com?project=cl6xntneb7t28072bggdydv7a&label=cl6xnzi4a7ldn0729381g7104",
128+
"Has Open Issues": 0,
129+
"Skipped": false
130+
}]
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import json
2+
from typing import Dict, Any
3+
4+
from labelbox.data.serialization.labelbox_v1.converter import LBV1Converter
5+
6+
IGNORE_KEYS = [
7+
"Data Split", "media_type", "DataRow Metadata", "Media Attributes"
8+
]
9+
10+
11+
def round_dict(data: Dict[str, Any]) -> Dict[str, Any]:
12+
for key in data:
13+
if isinstance(data[key], float):
14+
data[key] = int(data[key])
15+
elif isinstance(data[key], dict):
16+
data[key] = round_dict(data[key])
17+
return data
18+
19+
20+
def test_pdf():
21+
"""
22+
Tests an export from a pdf document with only bounding boxes
23+
"""
24+
payload = json.load(
25+
open('tests/data/assets/labelbox_v1/pdf_export.json', 'r'))
26+
collection = LBV1Converter.deserialize(payload)
27+
serialized = next(LBV1Converter.serialize(collection))
28+
29+
payload = payload[0] # only one document in the export
30+
31+
serialized = {k: v for k, v in serialized.items() if k not in IGNORE_KEYS}
32+
33+
assert serialized.keys() == payload.keys()
34+
for key in payload.keys():
35+
if key == 'Label':
36+
serialized_no_classes = [{
37+
k: v for k, v in dic.items() if k != 'classifications'
38+
} for dic in serialized[key]['objects']]
39+
serialized_round = [
40+
round_dict(dic) for dic in serialized_no_classes
41+
]
42+
payload_round = [round_dict(dic) for dic in payload[key]['objects']]
43+
assert payload_round == serialized_round
44+
else:
45+
assert serialized[key] == payload[key]

tests/data/serialization/labelbox_v1/test_image.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ def test_image(file_path):
2222

2323
# We are storing the media types now.
2424
payload['media_type'] = 'image'
25+
payload['Global Key'] = None
2526

2627
assert serialized.keys() == payload.keys()
2728

@@ -31,6 +32,8 @@ def test_image(file_path):
3132
elif key == 'Label':
3233
for annotation_a, annotation_b in zip(serialized[key]['objects'],
3334
payload[key]['objects']):
35+
annotation_b['page'] = None
36+
annotation_b['unit'] = None
3437
if not len(annotation_a['classifications']):
3538
# We don't add a classification key to the payload if there is no classifications.
3639
annotation_a.pop('classifications')

tests/data/serialization/labelbox_v1/test_text.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ def test_text():
1010
serialized = next(LBV1Converter.serialize(collection))
1111

1212
payload['media_type'] = 'text'
13+
payload['Global Key'] = None
1314

1415
assert serialized.keys() == payload.keys()
1516
for key in serialized:
@@ -18,6 +19,8 @@ def test_text():
1819
elif key == 'Label':
1920
for annotation_a, annotation_b in zip(serialized[key]['objects'],
2021
payload[key]['objects']):
22+
annotation_b['page'] = None
23+
annotation_b['unit'] = None
2124
if not len(annotation_a['classifications']):
2225
# We don't add a classification key to the payload if there is no classifications.
2326
annotation_a.pop('classifications')

tests/data/serialization/labelbox_v1/test_unknown_media.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ def test_image():
1616

1717
for row in payload:
1818
row['media_type'] = 'image'
19+
row['Global Key'] = None
1920

2021
collection = LBV1Converter.deserialize(payload)
2122
for idx, serialized in enumerate(LBV1Converter.serialize(collection)):
@@ -30,6 +31,8 @@ def test_image():
3031
if not len(annotation_a['classifications']):
3132
# We don't add a classification key to the payload if there is no classifications.
3233
annotation_a.pop('classifications')
34+
annotation_b['page'] = None
35+
annotation_b['unit'] = None
3336

3437
if isinstance(annotation_b.get('classifications'),
3538
list) and len(

tests/data/serialization/labelbox_v1/test_video.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ def test_video():
1818
collection = LBV1Converter.deserialize([payload])
1919
serialized = next(LBV1Converter.serialize(collection))
2020
payload['media_type'] = 'video'
21+
payload['Global Key'] = None
2122
assert serialized.keys() == payload.keys()
2223
for key in serialized:
2324
if key != 'Label':
@@ -32,6 +33,8 @@ def test_video():
3233

3334
for obj_a, obj_b in zip(annotation_a['objects'],
3435
annotation_b['objects']):
36+
obj_b['page'] = None
37+
obj_b['unit'] = None
3538
obj_a = round_dict(obj_a)
3639
obj_b = round_dict(obj_b)
3740
assert obj_a == obj_b

0 commit comments

Comments
 (0)