Skip to content

Commit 11793a4

Browse files
authored
[AL-5103] Add new data type classes to annotation types
2 parents e96dd3c + 7fe170d commit 11793a4

File tree

10 files changed

+250
-23
lines changed

10 files changed

+250
-23
lines changed

labelbox/data/annotation_types/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@
2020
from .classification import Radio
2121
from .classification import Text
2222

23+
from .data import AudioData
24+
from .data import ConversationData
25+
from .data import DicomData
26+
from .data import DocumentData
27+
from .data import HTMLData
2328
from .data import ImageData
2429
from .data import MaskData
2530
from .data import TextData
Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1+
from .audio import AudioData
2+
from .conversation import ConversationData
3+
from .dicom import DicomData
4+
from .document import DocumentData
5+
from .html import HTMLData
16
from .raster import ImageData
27
from .raster import MaskData
38
from .text import TextData
4-
from .video import VideoData
9+
from .video import VideoData
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from .base_data import BaseData
2+
3+
4+
class AudioData(BaseData):
5+
...
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from .base_data import BaseData
2+
3+
4+
class ConversationData(BaseData):
5+
...
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from .base_data import BaseData
2+
3+
4+
class DicomData(BaseData):
5+
...
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from .base_data import BaseData
2+
3+
4+
class DocumentData(BaseData):
5+
...
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from .base_data import BaseData
2+
3+
4+
class HTMLData(BaseData):
5+
...

tests/integration/annotation_import/conftest.py

Lines changed: 168 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,167 @@
1111
from labelbox.schema.queue_mode import QueueMode
1212

1313

14+
@pytest.fixture()
15+
def audio_data_row(rand_gen):
16+
return {
17+
"row_data":
18+
"https://storage.googleapis.com/labelbox-datasets/audio-sample-data/sample-audio-1.mp3",
19+
"global_key":
20+
f"https://storage.googleapis.com/labelbox-datasets/audio-sample-data/sample-audio-1.mp3-{rand_gen(str)}",
21+
"media_type":
22+
"AUDIO",
23+
}
24+
25+
26+
@pytest.fixture()
27+
def conversation_data_row(rand_gen):
28+
return {
29+
"row_data":
30+
"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/sample-conversation-1.json",
31+
"global_key":
32+
f"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/sample-conversation-1.json-{rand_gen(str)}",
33+
"media_type":
34+
"CONVERSATIONAL",
35+
}
36+
37+
38+
@pytest.fixture()
39+
def dicom_data_row(rand_gen):
40+
return {
41+
"row_data":
42+
"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-dicom-1.dcm",
43+
"global_key":
44+
f"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-dicom-1.dcm-{rand_gen(str)}",
45+
"media_type":
46+
"DICOM",
47+
}
48+
49+
50+
@pytest.fixture()
51+
def geospatial_data_row(rand_gen):
52+
return {
53+
"row_data": {
54+
"tile_layer_url":
55+
"https://s3-us-west-1.amazonaws.com/lb-tiler-layers/mexico_city/{z}/{x}/{y}.png",
56+
"bounds": [[19.405662413477728, -99.21052827588443],
57+
[19.400498983095076, -99.20534818927473]],
58+
"min_zoom":
59+
12,
60+
"max_zoom":
61+
20,
62+
"epsg":
63+
"EPSG4326",
64+
},
65+
"global_key":
66+
f"https://s3-us-west-1.amazonaws.com/lb-tiler-layers/mexico_city/z/x/y.png-{rand_gen(str)}",
67+
"media_type":
68+
"TMS_GEO",
69+
}
70+
71+
72+
@pytest.fixture()
73+
def html_data_row(rand_gen):
74+
return {
75+
"row_data":
76+
"https://storage.googleapis.com/labelbox-datasets/html_sample_data/sample_html_1.html",
77+
"global_key":
78+
f"https://storage.googleapis.com/labelbox-datasets/html_sample_data/sample_html_1.html-{rand_gen(str)}",
79+
}
80+
81+
82+
@pytest.fixture()
83+
def image_data_row(rand_gen):
84+
return {
85+
"row_data":
86+
"https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg",
87+
"global_key":
88+
f"https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-{rand_gen(str)}",
89+
"media_type":
90+
"IMAGE",
91+
}
92+
93+
94+
@pytest.fixture()
95+
def document_data_row(rand_gen):
96+
return {
97+
"row_data": {
98+
"pdf_url":
99+
"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf",
100+
"text_layer_url":
101+
"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483-lb-textlayer.json"
102+
},
103+
"global_key":
104+
f"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf-{rand_gen(str)}",
105+
"media_type":
106+
"PDF",
107+
}
108+
109+
110+
@pytest.fixture()
111+
def text_data_row(rand_gen):
112+
return {
113+
"row_data":
114+
"https://lb-test-data.s3.us-west-1.amazonaws.com/text-samples/sample-text-1.txt",
115+
"global_key":
116+
f"https://lb-test-data.s3.us-west-1.amazonaws.com/text-samples/sample-text-1.txt-{rand_gen(str)}",
117+
"media_type":
118+
"TEXT",
119+
}
120+
121+
122+
@pytest.fixture()
123+
def video_data_row(rand_gen):
124+
return {
125+
"row_data":
126+
"https://storage.googleapis.com/labelbox-datasets/video-sample-data/sample-video-1.mp4",
127+
"global_key":
128+
f"https://storage.googleapis.com/labelbox-datasets/video-sample-data/sample-video-1.mp4-{rand_gen(str)}",
129+
"media_type":
130+
"VIDEO",
131+
}
132+
133+
134+
@pytest.fixture
135+
def data_row_json_by_data_type(audio_data_row, conversation_data_row,
136+
dicom_data_row, geospatial_data_row,
137+
html_data_row, image_data_row, document_data_row,
138+
text_data_row, video_data_row):
139+
return {
140+
'audio': audio_data_row,
141+
'conversation': conversation_data_row,
142+
'dicom': dicom_data_row,
143+
'geospatial': geospatial_data_row,
144+
'html': html_data_row,
145+
'image': image_data_row,
146+
'document': document_data_row,
147+
'text': text_data_row,
148+
'video': video_data_row,
149+
}
150+
151+
152+
@pytest.fixture
153+
def annotations_by_data_type(polygon_inference, rectangle_inference,
154+
line_inference, entity_inference,
155+
checklist_inference, text_inference,
156+
video_checklist_inference):
157+
return {
158+
'audio': [checklist_inference, text_inference],
159+
'conversation': [checklist_inference, text_inference, entity_inference],
160+
'dicom': [line_inference],
161+
'document': [
162+
entity_inference, checklist_inference, text_inference,
163+
rectangle_inference
164+
],
165+
'html': [text_inference, checklist_inference],
166+
'image': [
167+
polygon_inference, rectangle_inference, line_inference,
168+
checklist_inference, text_inference
169+
],
170+
'text': [entity_inference, checklist_inference, text_inference],
171+
'video': [video_checklist_inference]
172+
}
173+
174+
14175
@pytest.fixture
15176
def ontology():
16177
bbox_tool = {
@@ -168,10 +329,10 @@ def configured_project_pdf(client, ontology, rand_gen, pdf_url):
168329

169330

170331
@pytest.fixture
171-
def dataset_pdf_entity(client, rand_gen, pdf_entity_data_row):
332+
def dataset_pdf_entity(client, rand_gen, document_data_row):
172333
dataset = client.create_dataset(name=rand_gen(str))
173334
data_row_ids = []
174-
data_row = dataset.create_data_row(pdf_entity_data_row)
335+
data_row = dataset.create_data_row(document_data_row)
175336
data_row_ids.append(data_row.uid)
176337
yield dataset, data_row_ids
177338
dataset.delete()
@@ -298,11 +459,13 @@ def entity_inference(prediction_id_mapping):
298459
@pytest.fixture
299460
def segmentation_inference(prediction_id_mapping):
300461
segmentation = prediction_id_mapping['superpixel'].copy()
301-
segmentation.update(
302-
{'mask': {
462+
segmentation.update({
463+
'mask': {
464+
# TODO: Use a real URI
303465
'instanceURI': "sampleuri",
304466
'colorRGB': [0, 0, 0]
305-
}})
467+
}
468+
})
306469
del segmentation['tool']
307470
return segmentation
308471

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import pytest
2+
import labelbox as lb
3+
import labelbox.types as lb_types
4+
from labelbox.data.annotation_types.data import AudioData, ConversationData, DicomData, DocumentData, HTMLData, ImageData, TextData
5+
from labelbox.data.serialization import NDJsonConverter
6+
from labelbox.schema.annotation_import import AnnotationImportState
7+
8+
9+
# TODO: Add VideoData. Currently label import job finishes without errors but project.export_labels() returns empty list.
10+
@pytest.mark.parametrize('data_type_class', [
11+
AudioData, ConversationData, DicomData, DocumentData, HTMLData, ImageData,
12+
TextData
13+
])
14+
def test_import_data_types(client, configured_project,
15+
data_row_json_by_data_type, annotations_by_data_type,
16+
data_type_class):
17+
18+
project_id = configured_project.uid
19+
20+
data_type_string = data_type_class.__name__[:-4].lower()
21+
data_row_ndjson = data_row_json_by_data_type[data_type_string]
22+
dataset = next(configured_project.datasets())
23+
data_row = dataset.create_data_row(data_row_ndjson)
24+
25+
annotations_ndjson = annotations_by_data_type[data_type_string]
26+
annotations_list = [
27+
label.annotations
28+
for label in NDJsonConverter.deserialize(annotations_ndjson)
29+
]
30+
labels = [
31+
lb_types.Label(data=data_type_class(uid=data_row.uid),
32+
annotations=annotations)
33+
for annotations in annotations_list
34+
]
35+
36+
label_import = lb.LabelImport.create_from_objects(
37+
client, project_id, f'test-import-{data_type_string}', labels)
38+
label_import.wait_until_done()
39+
40+
assert label_import.state == AnnotationImportState.FINISHED
41+
assert len(label_import.errors) == 0
42+
exported_labels = configured_project.export_labels(download=True)
43+
objects = exported_labels[0]['Label']['objects']
44+
classifications = exported_labels[0]['Label']['classifications']
45+
assert len(objects) + len(classifications) == len(labels)
46+
data_row.delete()

tests/integration/conftest.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -177,23 +177,6 @@ def pdf_url(client):
177177
return {"row_data": {"pdf_url": pdf_url,}, "global_key": str(uuid.uuid4())}
178178

179179

180-
@pytest.fixture(scope="session")
181-
def pdf_entity_data_row(client):
182-
pdf_url = client.upload_file(
183-
'tests/assets/arxiv-pdf_data_99-word-token-pdfs_0801.3483.pdf')
184-
text_layer_url = client.upload_file(
185-
'tests/assets/arxiv-pdf_data_99-word-token-pdfs_0801.3483-lb-textlayer.json'
186-
)
187-
188-
return {
189-
"row_data": {
190-
"pdf_url": pdf_url,
191-
"text_layer_url": text_layer_url
192-
},
193-
"global_key": str(uuid.uuid4())
194-
}
195-
196-
197180
@pytest.fixture
198181
def project(client, rand_gen):
199182
project = client.create_project(name=rand_gen(str),

0 commit comments

Comments
 (0)