Skip to content

Commit 4ac94f3

Browse files
committed
fix validation, add tests, add comments
1 parent 1aae84c commit 4ac94f3

File tree

7 files changed

+151
-46
lines changed

7 files changed

+151
-46
lines changed

labelbox/data/annotation_types/collection.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,14 @@ def assign_feature_schema_ids(
3434
self, ontology_builder: "ontology.OntologyBuilder") -> "LabelList":
3535
"""
3636
Adds schema ids to all FeatureSchema objects in the Labels.
37-
This is necessary for MAL.
3837
3938
Args:
4039
ontology_builder: The ontology that matches the feature names assigned to objects in this LabelList
4140
Returns:
4241
LabelList. useful for chaining these modifying functions
42+
43+
Warning: assign_feature_schema_ids is now obsolete, you can
44+
now use names directly without having to lookup schema_ids.
4345
"""
4446
for label in self._data:
4547
label.assign_feature_schema_ids(ontology_builder)

labelbox/data/annotation_types/label.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,6 @@ def assign_feature_schema_ids(
131131
self, ontology_builder: ontology.OntologyBuilder) -> "Label":
132132
"""
133133
Adds schema ids to all FeatureSchema objects in the Labels.
134-
This is necessary for MAL.
135134
136135
Args:
137136
ontology_builder: The ontology that matches the feature names assigned to objects in this dataset

labelbox/data/serialization/ndjson/base.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ class NDAnnotation(NDJsonBase):
3939
@validator('name', pre=True, always=True)
4040
def validate_name(cls, v, values):
4141
if v is None and 'schema_id' not in values:
42-
raise ValueError("Name is not set. Either set name or schema_id.")
42+
raise ValueError(
43+
"Name and schema_id are not set. Either set name or schema_id.")
4344

4445
@validator('schema_id', pre=True, always=True)
4546
def validate_id(cls, v, values):

labelbox/data/serialization/ndjson/classification.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ class NDFeature(BaseModel):
1717
@validator('name', pre=True, always=True)
1818
def validate_name(cls, v, values):
1919
if v is None and 'schema_id' not in values:
20-
raise ValueError("Name is not set. Either set name or schema_id.")
20+
raise ValueError(
21+
"Name and schema_id are not set. Either set name or schema_id.")
2122

2223
@validator('schema_id', pre=True, always=True)
2324
def validate_id(cls, v, values):

labelbox/schema/bulk_import_request.py

Lines changed: 118 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@
99
import backoff
1010
import ndjson
1111
import requests
12-
from pydantic import BaseModel, validator
12+
from pydantic import BaseModel, root_validator, validator
1313
from typing_extensions import Literal
1414
from typing import (Any, List, Optional, BinaryIO, Dict, Iterable, Tuple, Union,
1515
Type, Set, TYPE_CHECKING)
1616

1717
from labelbox import exceptions as lb_exceptions
18+
from labelbox.data.annotation_types.types import Cuid
19+
from labelbox.data.ontology import get_feature_schema_lookup
1820
from labelbox.orm.model import Entity
1921
from labelbox import utils
2022
from labelbox.orm import query
@@ -408,12 +410,14 @@ def _validate_ndjson(lines: Iterable[Dict[str, Any]],
408410
MALValidationError: Raise for invalid NDJson
409411
UuidError: Duplicate UUID in upload
410412
"""
411-
feature_schemas = get_mal_schemas(project.ontology())
413+
feature_schemas_by_id, feature_schemas_by_name = get_mal_schemas(
414+
project.ontology())
412415
uids: Set[str] = set()
413416
for idx, line in enumerate(lines):
414417
try:
415418
annotation = NDAnnotation(**line)
416-
annotation.validate_instance(feature_schemas)
419+
annotation.validate_instance(feature_schemas_by_id,
420+
feature_schemas_by_name)
417421
uuid = str(annotation.uuid)
418422
if uuid in uids:
419423
raise lb_exceptions.UuidError(
@@ -437,14 +441,18 @@ def parse_classification(tool):
437441
dict
438442
"""
439443
if tool['type'] in ['radio', 'checklist']:
444+
option_schema_ids = [r['featureSchemaId'] for r in tool['options']]
445+
option_names = [r['value'] for r in tool['options']]
440446
return {
441447
'tool': tool['type'],
442448
'featureSchemaId': tool['featureSchemaId'],
443-
'options': [r['featureSchemaId'] for r in tool['options']]
449+
'name': tool['name'],
450+
'options': [*option_schema_ids, *option_names]
444451
}
445452
elif tool['type'] == 'text':
446453
return {
447454
'tool': tool['type'],
455+
'name': tool['name'],
448456
'featureSchemaId': tool['featureSchemaId']
449457
}
450458

@@ -456,24 +464,37 @@ def get_mal_schemas(ontology):
456464
Args:
457465
ontology (Ontology)
458466
Returns:
459-
Dict : Useful for looking up a tool from a given feature schema id
467+
Dict, Dict : Useful for looking up a tool from a given feature schema id or name
460468
"""
461469

462-
valid_feature_schemas = {}
470+
valid_feature_schemas_by_schema_id = {}
471+
valid_feature_schemas_by_name = {}
463472
for tool in ontology.normalized['tools']:
464473
classifications = [
465474
parse_classification(classification_tool)
466475
for classification_tool in tool['classifications']
467476
]
468-
classifications = {v['featureSchemaId']: v for v in classifications}
469-
valid_feature_schemas[tool['featureSchemaId']] = {
477+
classifications_by_schema_id = {
478+
v['featureSchemaId']: v for v in classifications
479+
}
480+
classifications_by_name = {v['name']: v for v in classifications}
481+
valid_feature_schemas_by_schema_id[tool['featureSchemaId']] = {
482+
'tool': tool['tool'],
483+
'classificationsBySchemaId': classifications_by_schema_id,
484+
'classificationsByName': classifications_by_name,
485+
'name': tool['name']
486+
}
487+
valid_feature_schemas_by_name[tool['name']] = {
470488
'tool': tool['tool'],
471-
'classifications': classifications
489+
'classificationsBySchemaId': classifications_by_schema_id,
490+
'classificationsByName': classifications_by_name,
491+
'name': tool['name']
472492
}
473493
for tool in ontology.normalized['classifications']:
474-
valid_feature_schemas[tool['featureSchemaId']] = parse_classification(
475-
tool)
476-
return valid_feature_schemas
494+
valid_feature_schemas_by_schema_id[
495+
tool['featureSchemaId']] = parse_classification(tool)
496+
valid_feature_schemas_by_name[tool['name']] = parse_classification(tool)
497+
return valid_feature_schemas_by_schema_id, valid_feature_schemas_by_name
477498

478499

479500
LabelboxID: str = pydantic.Field(..., min_length=25, max_length=25)
@@ -585,27 +606,52 @@ class DataRow(BaseModel):
585606

586607

587608
class NDFeatureSchema(BaseModel):
588-
schemaId: str = LabelboxID
609+
schemaId: Optional[Cuid] = None
610+
name: Optional[str] = None
611+
612+
@root_validator
613+
def must_set_one(cls, values):
614+
if values['schemaId'] is None and values['name'] is None:
615+
raise ValueError(
616+
"Must set either schemaId or name for all feature schemas")
617+
return values
589618

590619

591620
class NDBase(NDFeatureSchema):
592621
ontology_type: str
593622
uuid: UUID
594623
dataRow: DataRow
595624

596-
def validate_feature_schemas(self, valid_feature_schemas):
597-
if self.schemaId not in valid_feature_schemas:
598-
raise ValueError(
599-
f"schema id {self.schemaId} is not valid for the provided project's ontology."
600-
)
625+
def validate_feature_schemas(self, valid_feature_schemas_by_id,
626+
valid_feature_schemas_by_name):
627+
if self.name:
628+
if self.name not in valid_feature_schemas_by_name:
629+
raise ValueError(
630+
f"name {self.name} is not valid for the provided project's ontology."
631+
)
601632

602-
if self.ontology_type != valid_feature_schemas[self.schemaId]['tool']:
603-
raise ValueError(
604-
f"Schema id {self.schemaId} does not map to the assigned tool {valid_feature_schemas[self.schemaId]['tool']}"
605-
)
633+
if self.ontology_type != valid_feature_schemas_by_name[
634+
self.name]['tool']:
635+
raise ValueError(
636+
f"Name {self.name} does not map to the assigned tool {valid_feature_schemas_by_name[self.name]['tool']}"
637+
)
638+
639+
if self.schemaId:
640+
if self.schemaId not in valid_feature_schemas_by_id:
641+
raise ValueError(
642+
f"schema id {self.schemaId} is not valid for the provided project's ontology."
643+
)
606644

607-
def validate_instance(self, valid_feature_schemas):
608-
self.validate_feature_schemas(valid_feature_schemas)
645+
if self.ontology_type != valid_feature_schemas_by_id[
646+
self.schemaId]['tool']:
647+
raise ValueError(
648+
f"Schema id {self.schemaId} does not map to the assigned tool {valid_feature_schemas_by_id[self.schemaId]['tool']}"
649+
)
650+
651+
def validate_instance(self, valid_feature_schemas_by_id,
652+
valid_feature_schemas_by_name):
653+
self.validate_feature_schemas(valid_feature_schemas_by_id,
654+
valid_feature_schemas_by_name)
609655

610656
class Config:
611657
#Users shouldn't to add extra data to the payload
@@ -629,9 +675,20 @@ class NDText(NDBase):
629675
#No feature schema to check
630676

631677

678+
class NDAnswer(BaseModel):
679+
schemaId: Optional[Cuid] = None
680+
value: Optional[str] = None
681+
682+
@root_validator
683+
def must_set_one(cls, values):
684+
if values['schemaId'] is None and values['value'] is None:
685+
raise ValueError("Must set either schemaId or value for answers")
686+
return values
687+
688+
632689
class NDChecklist(VideoSupported, NDBase):
633690
ontology_type: Literal["checklist"] = "checklist"
634-
answers: List[NDFeatureSchema] = pydantic.Field(determinant=True)
691+
answers: List[NDAnswer] = pydantic.Field(determinant=True)
635692

636693
@validator('answers', pre=True)
637694
def validate_answers(cls, value, field):
@@ -640,32 +697,43 @@ def validate_answers(cls, value, field):
640697
raise ValueError("Checklist answers should not be empty")
641698
return value
642699

643-
def validate_feature_schemas(self, valid_feature_schemas):
700+
def validate_feature_schemas(self, valid_feature_schemas_by_id,
701+
valid_feature_schemas_by_name):
644702
#Test top level feature schema for this tool
645-
super(NDChecklist, self).validate_feature_schemas(valid_feature_schemas)
703+
super(NDChecklist,
704+
self).validate_feature_schemas(valid_feature_schemas_by_id,
705+
valid_feature_schemas_by_name)
646706
#Test the feature schemas provided to the answer field
647-
if len(set([answer.schemaId for answer in self.answers])) != len(
648-
self.answers):
707+
if len(set([answer.value or answer.schemaId for answer in self.answers
708+
])) != len(self.answers):
649709
raise ValueError(
650710
f"Duplicated featureSchema found for checklist {self.uuid}")
651711
for answer in self.answers:
652-
options = valid_feature_schemas[self.schemaId]['options']
653-
if answer.schemaId not in options:
712+
options = valid_feature_schemas_by_name[
713+
self.
714+
name]['options'] if self.name else valid_feature_schemas_by_id[
715+
self.schemaId]['options']
716+
if answer.value not in options and answer.schemaId not in options:
654717
raise ValueError(
655718
f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {answer}"
656719
)
657720

658721

659722
class NDRadio(VideoSupported, NDBase):
660723
ontology_type: Literal["radio"] = "radio"
661-
answer: NDFeatureSchema = pydantic.Field(determinant=True)
662-
663-
def validate_feature_schemas(self, valid_feature_schemas):
664-
super(NDRadio, self).validate_feature_schemas(valid_feature_schemas)
665-
options = valid_feature_schemas[self.schemaId]['options']
666-
if self.answer.schemaId not in options:
724+
answer: NDAnswer = pydantic.Field(determinant=True)
725+
726+
def validate_feature_schemas(self, valid_feature_schemas_by_id,
727+
valid_feature_schemas_by_name):
728+
super(NDRadio,
729+
self).validate_feature_schemas(valid_feature_schemas_by_id,
730+
valid_feature_schemas_by_name)
731+
options = valid_feature_schemas_by_name[
732+
self.name]['options'] if self.name else valid_feature_schemas_by_id[
733+
self.schemaId]['options']
734+
if self.answer.value not in options and self.answer.schemaId not in options:
667735
raise ValueError(
668-
f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {self.answer.schemaId}"
736+
f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {self.answer.value or self.answer.schemaId}"
669737
)
670738

671739

@@ -684,11 +752,20 @@ class NDBaseTool(NDBase):
684752
classifications: List[NDClassification] = []
685753

686754
#This is indepdent of our problem
687-
def validate_feature_schemas(self, valid_feature_schemas):
688-
super(NDBaseTool, self).validate_feature_schemas(valid_feature_schemas)
755+
def validate_feature_schemas(self, valid_feature_schemas_by_id,
756+
valid_feature_schemas_by_name):
757+
super(NDBaseTool,
758+
self).validate_feature_schemas(valid_feature_schemas_by_id,
759+
valid_feature_schemas_by_name)
689760
for classification in self.classifications:
690761
classification.validate_feature_schemas(
691-
valid_feature_schemas[self.schemaId]['classifications'])
762+
valid_feature_schemas_by_name[
763+
self.name]['classificationsBySchemaId']
764+
if self.name else valid_feature_schemas_by_id[self.schemaId]
765+
['classificationsBySchemaId'], valid_feature_schemas_by_name[
766+
self.name]['classificationsByName']
767+
if self.name else valid_feature_schemas_by_id[
768+
self.schemaId]['classificationsByName'])
692769

693770
@validator('classifications', pre=True)
694771
def validate_subclasses(cls, value, field):

tests/integration/annotation_import/conftest.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ def prediction_id_mapping(configured_project):
135135
result[tool_type] = {
136136
"uuid": str(uuid.uuid4()),
137137
"schemaId": tool['featureSchemaId'],
138+
"name": tool['name'],
138139
"dataRow": {
139140
"id": configured_project.data_row_ids[idx],
140141
},
@@ -178,10 +179,15 @@ def rectangle_inference(prediction_id_mapping):
178179
'classifications': [{
179180
"schemaId":
180181
rectangle['tool']['classifications'][0]['featureSchemaId'],
182+
"name":
183+
rectangle['tool']['classifications'][0]['name'],
181184
"answer": {
182185
"schemaId":
183186
rectangle['tool']['classifications'][0]['options'][0]
184-
['featureSchemaId']
187+
['featureSchemaId'],
188+
"value":
189+
rectangle['tool']['classifications'][0]['options'][0]
190+
['value']
185191
}
186192
}]
187193
})

tests/integration/annotation_import/test_ndjson_validation.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ def test_invalid_checklist_item(checklist_inference, configured_project):
8282
with pytest.raises(MALValidationError):
8383
_validate_ndjson([pred], configured_project)
8484

85+
pred['answers'] = [{"name": "asdfg"}]
86+
with pytest.raises(MALValidationError):
87+
_validate_ndjson([pred], configured_project)
88+
8589
pred['answers'] = [{"schemaId": "1232132132"}]
8690
with pytest.raises(MALValidationError):
8791
_validate_ndjson([pred], configured_project)
@@ -177,10 +181,25 @@ def test_invalid_feature_schema(configured_project, rectangle_inference):
177181
_validate_ndjson([pred], configured_project)
178182

179183

184+
def test_name_only_feature_schema(configured_project, rectangle_inference):
185+
#Trying to upload a polygon and rectangle at the same time
186+
pred = rectangle_inference.copy()
187+
del pred['schemaId']
188+
_validate_ndjson([pred], configured_project)
189+
190+
191+
def test_schema_id_only_feature_schema(configured_project, rectangle_inference):
192+
#Trying to upload a polygon and rectangle at the same time
193+
pred = rectangle_inference.copy()
194+
del pred['name']
195+
_validate_ndjson([pred], configured_project)
196+
197+
180198
def test_missing_feature_schema(configured_project, rectangle_inference):
181199
#Trying to upload a polygon and rectangle at the same time
182200
pred = rectangle_inference.copy()
183201
del pred['schemaId']
202+
del pred['name']
184203
with pytest.raises(MALValidationError):
185204
_validate_ndjson([pred], configured_project)
186205

0 commit comments

Comments
 (0)