Skip to content

Commit ee2d55c

Browse files
author
Kevin Kim
committed
First commit for supporting globalKeys in annotation import SDK
1 parent 606ea8e commit ee2d55c

File tree

5 files changed

+101
-1
lines changed

5 files changed

+101
-1
lines changed

labelbox/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from labelbox.schema.project import Project
66
from labelbox.schema.model import Model
77
from labelbox.schema.bulk_import_request import BulkImportRequest
8-
from labelbox.schema.annotation_import import MALPredictionImport, MEAPredictionImport, LabelImport
8+
from labelbox.schema.annotation_import import MALPredictionImport, MEAPredictionImport, LabelImport, MEAToMALPredictionImport
99
from labelbox.schema.dataset import Dataset
1010
from labelbox.schema.data_row import DataRow
1111
from labelbox.schema.label import Label

labelbox/data/annotation_types/data/base_data.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,6 @@ class BaseData(BaseModel, ABC):
1111
"""
1212
external_id: Optional[str] = None
1313
uid: Optional[str] = None
14+
global_key: Optional[str] = None
1415
media_attributes: Optional[Dict[str, Any]] = None
1516
metadata: Optional[List[Dict[str, Any]]] = None

labelbox/schema/annotation_import.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@ def _get_ndjson_from_objects(cls, objects: Union[List[Dict[str, Any]],
155155
)
156156

157157
objects = serialize_labels(objects)
158+
# cls.validate_data_rows(objects)
159+
158160
data_str = ndjson.dumps(objects)
159161
if not data_str:
160162
raise ValueError(f"{object_name} cannot be empty")
@@ -171,6 +173,36 @@ def refresh(self) -> None:
171173
as_json=True)
172174
self._set_field_values(res)
173175

176+
@classmethod
177+
def validate_data_rows(cls, objects: List[Dict[str, Any]]):
178+
"""
179+
Validates annotations by checking 'dataRow' is provided
180+
and only one of 'id' or 'globalKey' is provided.
181+
182+
Shows up to `max_num_errors` errors if invalidated, to prevent
183+
large number of error messages from being printed out
184+
"""
185+
errors = []
186+
max_num_errors = 100
187+
for object in objects:
188+
if 'dataRow' not in object:
189+
errors.append(f"'dataRow' is missing in {object}")
190+
elif 'id' in object['dataRow'] and 'globalKey' in object['dataRow']:
191+
errors.append(
192+
f"Must provide only one of 'id' or 'globalKey' for 'dataRow' in {object}"
193+
)
194+
195+
if errors:
196+
errors_length = len(errors)
197+
formatted_errors = '\n'.join(errors[:max_num_errors])
198+
if errors_length > max_num_errors:
199+
logger.warning(
200+
f"Found more than {max_num_errors} errors. Showing first {max_num_errors} error messages..."
201+
)
202+
raise ValueError(
203+
f"Error while validating annotations. Found {errors_length} annotations with errors. Errors:\n{formatted_errors}"
204+
)
205+
174206
@classmethod
175207
def from_name(cls,
176208
client: "labelbox.Client",

tests/integration/annotation_import/test_label_import.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,41 @@ def test_create_from_objects(client, configured_project, object_predictions,
3838
label_import.input_file_url, object_predictions)
3939

4040

41+
def test_data_row_validation_errors(client, configured_project,
42+
object_predictions):
43+
name = str(uuid.uuid4())
44+
# Set up data for validation errors
45+
# Invalid: Remove 'dataRow' part entirely
46+
del object_predictions[0]['dataRow']
47+
48+
# Invalid: Set both id and globalKey
49+
object_predictions[1]['dataRow'] = {
50+
'id': 'some id',
51+
'globalKey': 'some global key'
52+
}
53+
54+
# Valid
55+
object_predictions[2]['dataRow'] = {
56+
'id': 'some id',
57+
}
58+
59+
# Valid
60+
object_predictions[3]['dataRow'] = {
61+
'globalKey': 'some global key',
62+
}
63+
64+
with pytest.raises(ValueError) as exc_info:
65+
label_import = LabelImport.create_from_objects(
66+
client=client,
67+
project_id=configured_project.uid,
68+
name=name,
69+
labels=object_predictions)
70+
exception_str = str(exc_info.value)
71+
assert "Found 2 annotations with errors" in exception_str
72+
assert "'dataRow' is missing in" in exception_str
73+
assert "Must provide only one of 'id' or 'globalKey' for 'dataRow'" in exception_str
74+
75+
4176
def test_create_from_label_objects(client, configured_project,
4277
object_predictions,
4378
annotation_import_test_helpers):

tests/integration/annotation_import/test_mea_prediction_import.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,38 @@
1111
"""
1212

1313

14+
def test_data_row_validation_errors(model_run_with_all_project_labels,
15+
object_predictions):
16+
name = str(uuid.uuid4())
17+
# Set up data for validation errors
18+
# Invalid: Remove 'dataRow' part entirely
19+
del object_predictions[0]['dataRow']
20+
21+
# Invalid: Set both id and globalKey
22+
object_predictions[1]['dataRow'] = {
23+
'id': 'some id',
24+
'globalKey': 'some global key'
25+
}
26+
27+
# Valid
28+
object_predictions[2]['dataRow'] = {
29+
'id': 'some id',
30+
}
31+
32+
# Valid
33+
object_predictions[3]['dataRow'] = {
34+
'globalKey': 'some global key',
35+
}
36+
37+
with pytest.raises(ValueError) as exc_info:
38+
model_run_with_all_project_labels.add_predictions(
39+
name=name, predictions=object_predictions)
40+
exception_str = str(exc_info.value)
41+
assert "Found 2 annotations with errors" in exception_str
42+
assert "'dataRow' is missing in" in exception_str
43+
assert "Must provide only one of 'id' or 'globalKey' for 'dataRow'" in exception_str
44+
45+
1446
def test_create_from_url(model_run_with_model_run_data_rows,
1547
annotation_import_test_helpers):
1648
name = str(uuid.uuid4())

0 commit comments

Comments
 (0)