Skip to content

Commit acdd0b3

Browse files
committed
ability to use ndjson converter with pdf and test updates
1 parent f11ac19 commit acdd0b3

File tree

12 files changed

+292
-10
lines changed

12 files changed

+292
-10
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
* Resets model run training metadata
99
* `ModelRun.get_config()`
1010
* Fetches model run training metadata
11+
* Support for document (pdf) de/serialization from exports
12+
* Use the `LBV1Converter.serialize()` and `LBV1Converter.deserialize()` methods
13+
* Support for document (pdf) de/serialization for imports
14+
* Use the `NDJsonConverter.serialize()` and `NDJsonConverter.deserialize()` methods
1115

1216
### Changed
1317
* `Model.create_model_run()`

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM python:3.7
1+
FROM python:3.8
22

33
RUN pip install pytest pytest-cases pytest-rerunfailures
44
RUN apt-get -y update

labelbox/data/serialization/ndjson/objects.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from ast import Bytes
22
from io import BytesIO
3-
from typing import Any, Dict, List, Tuple, Union
3+
from typing import Any, Dict, List, Tuple, Union, Optional
44
import base64
55
import numpy as np
66

@@ -21,6 +21,8 @@
2121

2222
class NDBaseObject(NDAnnotation):
2323
classifications: List[NDSubclassificationType] = []
24+
page: Optional[int] = None
25+
unit: Optional[str] = None
2426

2527

2628
class VideoSupported(BaseModel):
@@ -167,7 +169,9 @@ def from_common(cls, rectangle: Rectangle,
167169
name=name,
168170
schema_id=feature_schema_id,
169171
uuid=extra.get('uuid'),
170-
classifications=classifications)
172+
classifications=classifications,
173+
page=extra.get('page'),
174+
unit=extra.get('unit'))
171175

172176

173177
class NDFrameRectangle(VideoSupported):
@@ -352,7 +356,11 @@ def to_common(annotation: "NDObjectType") -> ObjectAnnotation:
352356
name=annotation.name,
353357
feature_schema_id=annotation.schema_id,
354358
classifications=classifications,
355-
extra={'uuid': annotation.uuid})
359+
extra={
360+
'uuid': annotation.uuid,
361+
'page': annotation.page,
362+
'unit': annotation.unit
363+
})
356364

357365
@classmethod
358366
def from_common(
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
[{
2+
"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4",
3+
"dataRow": {
4+
"id": "cl6xnv9h61fv0085yhtoq06ht"
5+
},
6+
"name": "boxy",
7+
"schemaId": "cl6xnuwt95lqq07330tbb3mfd",
8+
"classifications": [],
9+
"page": 4,
10+
"unit": "POINTS",
11+
"bbox": {
12+
"top": 162.73,
13+
"left": 32.45,
14+
"height": 388.16999999999996,
15+
"width": 101.66000000000001
16+
}
17+
}, {
18+
"uuid": "20eeef88-0294-49b4-a815-86588476bc6f",
19+
"dataRow": {
20+
"id": "cl6xnv9h61fv0085yhtoq06ht"
21+
},
22+
"name": "boxy",
23+
"schemaId": "cl6xnuwt95lqq07330tbb3mfd",
24+
"classifications": [],
25+
"page": 7,
26+
"unit": "POINTS",
27+
"bbox": {
28+
"top": 223.26,
29+
"left": 251.42,
30+
"height": 457.03999999999996,
31+
"width": 186.78
32+
}
33+
}, {
34+
"uuid": "641a8944-3938-409c-b4eb-dea354ed06e5",
35+
"dataRow": {
36+
"id": "cl6xnv9h61fv0085yhtoq06ht"
37+
},
38+
"name": "boxy",
39+
"schemaId": "cl6xnuwt95lqq07330tbb3mfd",
40+
"classifications": [],
41+
"page": 6,
42+
"unit": "POINTS",
43+
"bbox": {
44+
"top": 32.52,
45+
"left": 218.17,
46+
"height": 231.73,
47+
"width": 110.56000000000003
48+
}
49+
}, {
50+
"uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c",
51+
"dataRow": {
52+
"id": "cl6xnv9h61fv0085yhtoq06ht"
53+
},
54+
"name": "boxy",
55+
"schemaId": "cl6xnuwt95lqq07330tbb3mfd",
56+
"classifications": [],
57+
"page": 7,
58+
"unit": "POINTS",
59+
"bbox": {
60+
"top": 117.39,
61+
"left": 4.25,
62+
"height": 456.9200000000001,
63+
"width": 164.83
64+
}
65+
}, {
66+
"uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63",
67+
"dataRow": {
68+
"id": "cl6xnv9h61fv0085yhtoq06ht"
69+
},
70+
"name": "boxy",
71+
"schemaId": "cl6xnuwt95lqq07330tbb3mfd",
72+
"classifications": [],
73+
"page": 8,
74+
"unit": "POINTS",
75+
"bbox": {
76+
"top": 82.13,
77+
"left": 217.28,
78+
"height": 279.76,
79+
"width": 82.43000000000004
80+
}
81+
}, {
82+
"uuid": "1b009654-bc17-42a2-8a71-160e7808c403",
83+
"dataRow": {
84+
"id": "cl6xnv9h61fv0085yhtoq06ht"
85+
},
86+
"name": "boxy",
87+
"schemaId": "cl6xnuwt95lqq07330tbb3mfd",
88+
"classifications": [],
89+
"page": 3,
90+
"unit": "POINTS",
91+
"bbox": {
92+
"top": 298.12,
93+
"left": 83.34,
94+
"height": 203.83000000000004,
95+
"width": 0.37999999999999545
96+
}
97+
}]
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
[{
2+
"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4",
3+
"dataRow": {
4+
"id": "cl6xnv9h61fv0085yhtoq06ht"
5+
},
6+
"name": "boxy",
7+
"classifications": [],
8+
"page": 4,
9+
"unit": "POINTS",
10+
"bbox": {
11+
"top": 162.73,
12+
"left": 32.45,
13+
"height": 388.16999999999996,
14+
"width": 101.66000000000001
15+
}
16+
}, {
17+
"uuid": "20eeef88-0294-49b4-a815-86588476bc6f",
18+
"dataRow": {
19+
"id": "cl6xnv9h61fv0085yhtoq06ht"
20+
},
21+
"name": "boxy",
22+
"classifications": [],
23+
"page": 7,
24+
"unit": "POINTS",
25+
"bbox": {
26+
"top": 223.26,
27+
"left": 251.42,
28+
"height": 457.03999999999996,
29+
"width": 186.78
30+
}
31+
}, {
32+
"uuid": "641a8944-3938-409c-b4eb-dea354ed06e5",
33+
"dataRow": {
34+
"id": "cl6xnv9h61fv0085yhtoq06ht"
35+
},
36+
"name": "boxy",
37+
"classifications": [],
38+
"page": 6,
39+
"unit": "POINTS",
40+
"bbox": {
41+
"top": 32.52,
42+
"left": 218.17,
43+
"height": 231.73,
44+
"width": 110.56000000000003
45+
}
46+
}, {
47+
"uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c",
48+
"dataRow": {
49+
"id": "cl6xnv9h61fv0085yhtoq06ht"
50+
},
51+
"name": "boxy",
52+
"classifications": [],
53+
"page": 7,
54+
"unit": "POINTS",
55+
"bbox": {
56+
"top": 117.39,
57+
"left": 4.25,
58+
"height": 456.9200000000001,
59+
"width": 164.83
60+
}
61+
}, {
62+
"uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63",
63+
"dataRow": {
64+
"id": "cl6xnv9h61fv0085yhtoq06ht"
65+
},
66+
"name": "boxy",
67+
"classifications": [],
68+
"page": 8,
69+
"unit": "POINTS",
70+
"bbox": {
71+
"top": 82.13,
72+
"left": 217.28,
73+
"height": 279.76,
74+
"width": 82.43000000000004
75+
}
76+
}, {
77+
"uuid": "1b009654-bc17-42a2-8a71-160e7808c403",
78+
"dataRow": {
79+
"id": "cl6xnv9h61fv0085yhtoq06ht"
80+
},
81+
"name": "boxy",
82+
"classifications": [],
83+
"page": 3,
84+
"unit": "POINTS",
85+
"bbox": {
86+
"top": 298.12,
87+
"left": 83.34,
88+
"height": 203.83000000000004,
89+
"width": 0.37999999999999545
90+
}
91+
}]

tests/data/serialization/ndjson/test_classification.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,18 @@
44
from labelbox.data.serialization.ndjson.converter import NDJsonConverter
55
from labelbox.data.serialization.ndjson.objects import NDLine
66

7+
IGNORE_KEYS = ['unit', 'page']
8+
79

810
def test_classification():
911
with open('tests/data/assets/ndjson/classification_import.json',
1012
'r') as file:
1113
data = json.load(file)
1214
res = NDJsonConverter.deserialize(data).as_list()
1315
res = list(NDJsonConverter.serialize(res))
16+
for r in res:
17+
for key in IGNORE_KEYS:
18+
r.pop(key, None)
1419
assert res == data
1520

1621

@@ -20,4 +25,7 @@ def test_classification_with_name():
2025
data = json.load(file)
2126
res = NDJsonConverter.deserialize(data).as_list()
2227
res = list(NDJsonConverter.serialize(res))
28+
for r in res:
29+
for key in IGNORE_KEYS:
30+
r.pop(key, None)
2331
assert res == data
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import json
2+
3+
from labelbox.data.serialization.ndjson.converter import NDJsonConverter
4+
5+
6+
def round_dict(data):
7+
if isinstance(data, dict):
8+
for key in data:
9+
if isinstance(data[key], float):
10+
data[key] = int(data[key])
11+
elif isinstance(data[key], dict):
12+
data[key] = round_dict(data[key])
13+
elif isinstance(data[key], (list, tuple)):
14+
data[key] = [round_dict(r) for r in data[key]]
15+
16+
return data
17+
18+
19+
def test_pdf():
20+
"""
21+
Tests a pdf file with bbox annotations only
22+
"""
23+
with open('tests/data/assets/ndjson/pdf_import.json', 'r') as f:
24+
data = json.load(f)
25+
res = NDJsonConverter.deserialize(data).as_list()
26+
res = list(NDJsonConverter.serialize(res))
27+
assert [round_dict(x) for x in res] == [round_dict(x) for x in data]
28+
f.close()
29+
30+
31+
def test_pdf_with_name_only():
32+
"""
33+
Tests a pdf file with bbox annotations only
34+
"""
35+
with open('tests/data/assets/ndjson/pdf_import_name_only.json', 'r') as f:
36+
data = json.load(f)
37+
res = NDJsonConverter.deserialize(data).as_list()
38+
res = list(NDJsonConverter.serialize(res))
39+
assert [round_dict(x) for x in res] == [round_dict(x) for x in data]
40+
f.close()

tests/data/serialization/ndjson/test_export_video_objects.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,10 @@ def video_serialized_bbox_label():
585585
}
586586

587587

588+
#ignore uuid because we randomize if there was none
589+
IGNORE_KEYS = ["uuid", "page", "unit"]
590+
591+
588592
def test_serialize_video_objects():
589593
label = video_bbox_label()
590594
serialized_labels = NDJsonConverter.serialize([label])
@@ -593,8 +597,7 @@ def test_serialize_video_objects():
593597
manual_label = video_serialized_bbox_label()
594598

595599
for key in label.keys():
596-
#ignore uuid because we randomize if there was none
597-
if key != "uuid":
600+
if key not in IGNORE_KEYS:
598601
assert label[key] == manual_label[key]
599602

600603
assert len(label['segments']) == 2

tests/data/serialization/ndjson/test_image.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from labelbox.data.serialization.ndjson.converter import NDJsonConverter
66
from labelbox.data.annotation_types import Mask, Label, ObjectAnnotation, ImageData, MaskData
77

8+
IGNORE_KEYS = ['classifications', 'unit', 'page']
9+
810

911
def round_dict(data):
1012
if isinstance(data, dict):
@@ -26,7 +28,8 @@ def test_image():
2628
res = NDJsonConverter.deserialize(data).as_list()
2729
res = list(NDJsonConverter.serialize(res))
2830
for r in res:
29-
r.pop('classifications', None)
31+
for key in IGNORE_KEYS:
32+
r.pop(key, None)
3033
assert [round_dict(x) for x in res] == [round_dict(x) for x in data]
3134

3235

@@ -38,7 +41,8 @@ def test_image_with_name_only():
3841
res = NDJsonConverter.deserialize(data).as_list()
3942
res = list(NDJsonConverter.serialize(res))
4043
for r in res:
41-
r.pop('classifications', None)
44+
for key in IGNORE_KEYS:
45+
r.pop(key, None)
4246
assert [round_dict(x) for x in res] == [round_dict(x) for x in data]
4347

4448

@@ -68,7 +72,8 @@ def test_mask():
6872
res = NDJsonConverter.deserialize(data).as_list()
6973
res = list(NDJsonConverter.serialize(res))
7074
for r in res:
71-
r.pop('classifications', None)
75+
for key in IGNORE_KEYS:
76+
r.pop(key, None)
7277

7378
assert [round_dict(x) for x in res] == [round_dict(x) for x in data]
7479

@@ -94,5 +99,7 @@ def test_mask_from_arr():
9499
"mask": {
95100
"png":
96101
"iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAAAAABWESUoAAAAHklEQVR4nGNgGAKAEYn8j00BEyETBoOCUTAKhhwAAJW+AQwvpePVAAAAAElFTkSuQmCC"
97-
}
102+
},
103+
"page": None,
104+
"unit": None
98105
}

0 commit comments

Comments
 (0)