Skip to content

Commit 7a787dc

Browse files
author
Matt Sokoloff
committed
added video checking
1 parent c3bbcf5 commit 7a787dc

File tree

4 files changed

+72
-41
lines changed

4 files changed

+72
-41
lines changed

Dockerfile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,11 @@ FROM python:3.7
22

33
RUN pip install pytest
44

5-
COPY . /usr/src/labelbox
5+
66
WORKDIR /usr/src/labelbox
7+
COPY requirements.txt /usr/src/labelbox
8+
RUN pip install -r requirements.txt
9+
COPY . /usr/src/labelbox
10+
711

812
RUN python setup.py install

labelbox/schema/bulk_import_request.py

Lines changed: 25 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from typing import Tuple
1111
from typing import Union
1212
from uuid import UUID
13+
from pydantic import BaseModel, validator
1314

1415
import backoff
1516
import ndjson
@@ -243,7 +244,7 @@ def create_from_objects(cls, client, project_id: str, name: str,
243244
}
244245
}``
245246
246-
Args:
247+
Args:x
247248
client (Client): a Labelbox client
248249
project_id (str): id of project for which predictions will be imported
249250
name (str): name of BulkImportRequest
@@ -315,26 +316,8 @@ def create_from_local_file(cls,
315316
return cls(client, response_data["createBulkImportRequest"])
316317

317318

318-
"""
319-
#Outstanding questions:
320-
321-
* How to check data row media type?
322-
* Video
323-
- annotations without frames indices wouldn't be flagged right now
324-
* Everything else
325-
- We won't know if a text tool is being used for video.
326-
- Or a tool only support for images is being used for video
327-
... etc
328-
329-
- video only supports radio and checklist tools.
330-
- This would be good to validate here.
331-
332-
* While this is a pretty decent check it isn't going to be 100% since we aren't examining the actual data rows.
333-
* Eg entity recognition we can't check if the index is greater than the doc length.
334319

335320

336-
"""
337-
338321
def _validate_uuids(lines: Iterable[Dict[str, Any]]) -> None:
339322
"""Validate individual ndjson lines.
340323
- verifies that uuids are unique
@@ -348,6 +331,9 @@ def _validate_uuids(lines: Iterable[Dict[str, Any]]) -> None:
348331
'must be unique for the project.')
349332
uuids.add(uuid)
350333

334+
335+
336+
351337
def parse_classification(tool):
352338
"""
353339
Only radio, checklist, and text are supported for mal
@@ -371,15 +357,9 @@ def get_valid_feature_schemas(project):
371357
return valid_feature_schemas
372358

373359

374-
from pydantic import BaseModel, validator
375-
#Note that pydantic is a bit strict. It could break workflows of people who throw crap into the payload
376-
#Maybe set validate to false by default?
377360

378-
#How do I add subclasses?
379-
#What is valid?
380-
#Here is your answer :)
381361

382-
LabelboxID = str #todo
362+
LabelboxID = constr(min_length=25, max_length=25, strict=True)
383363

384364
#TODO: Is this defined elsewhere?
385365
class Bbox(TypedDict):
@@ -399,24 +379,27 @@ class Feature(BaseModel):
399379
schemaId: LabelboxID
400380

401381
class Config:
402-
#We don't want them to add extra stuff to the payload
382+
#Users shouldn't to add extra data to the payload
403383
extra = 'forbid'
404384

405-
406385
#Do this classes need to support uuids?
407386
class Text(Feature):
408387
ontology_type: str = "text"
409388
answer: str
410389

411-
class CheckList(Feature):
390+
class VideoSupported(Feature):
391+
#Note that frames are only allowed as top level inferences for video
392+
frames : Optional[List[TypedDict("frames", {"end" : int, "start" : int})]]
393+
394+
395+
class CheckList(VideoSupported):
412396
ontology_type: str = "checklist"
413397
answers: conlist(TypedDict('schemaId', {'schemaId': LabelboxID}), min_items = 1)
414398

415-
class Radio(Feature):
399+
class Radio(VideoSupported):
416400
ontology_type: str = "radio"
417401
answer: TypedDict('schemaId' , {'schemaId': LabelboxID})
418402

419-
420403
class Tool(Feature):
421404
classifications : List[Union[CheckList, Text, Radio]] = []
422405

@@ -479,6 +462,8 @@ def is_valid_mask(cls, v):
479462
raise ValueError(f"All rgb colors must be between 0 and 255. Found : {colors}")
480463
return v
481464

465+
466+
482467
class Annotation(BaseModel):
483468
uuid: UUID
484469
dataRow: TypedDict('dataRow' , {'id' : LabelboxID})
@@ -525,6 +510,14 @@ def validate(self, valid_datarows, valid_feature_schemas):
525510

526511

527512
def _validate_ndjson(lines: Iterable[Dict[str, Any]], project) -> None:
513+
"""
514+
Notes:
515+
- Validation doesn't check data row data types.
516+
This means we don't check to make sure that the annotation is valid for a particular data type.
517+
- video only supports radio and checklist tools and requires frame indices which we don't check for.
518+
- We also forbid extra so that might be too strict...
519+
- We also aren't checking bounds of the assets (eg frame index, image height, text length)
520+
"""
528521
data_row_ids = {data_row.uid : data_row for dataset in project.datasets() for data_row in dataset.data_rows()}
529522
feature_schemas = get_valid_feature_schemas(project)
530523
uids = set()
@@ -542,5 +535,5 @@ def _validate_ndjson(lines: Iterable[Dict[str, Any]], project) -> None:
542535
except (ValidationError, ValueError) as e:
543536
raise labelbox.exceptions.NDJsonError(f"Invalid NDJson on line {idx}") from e
544537

545-
546538

539+

tests/integration/conftest.py

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ def prediction_id_mapping(configured_project):
196196

197197
@pytest.fixture
198198
def polygon_inference(prediction_id_mapping):
199-
polygon = prediction_id_mapping['polygon']
199+
polygon = prediction_id_mapping['polygon'].copy()
200200
polygon.update(
201201
{
202202
"polygon": [{
@@ -219,7 +219,7 @@ def polygon_inference(prediction_id_mapping):
219219

220220
@pytest.fixture
221221
def rectangle_inference(prediction_id_mapping):
222-
rectangle = prediction_id_mapping['rectangle']
222+
rectangle = prediction_id_mapping['rectangle'].copy()
223223
rectangle.update( {"bbox": {
224224
"top": 48,
225225
"left": 58,
@@ -236,7 +236,7 @@ def rectangle_inference(prediction_id_mapping):
236236

237237
@pytest.fixture
238238
def line_inference(prediction_id_mapping):
239-
line = prediction_id_mapping['line']
239+
line = prediction_id_mapping['line'].copy()
240240
line.update({
241241
"line": [{
242242
"x": 147.692,
@@ -252,7 +252,7 @@ def line_inference(prediction_id_mapping):
252252

253253
@pytest.fixture
254254
def point_inference(prediction_id_mapping):
255-
point = prediction_id_mapping['point']
255+
point = prediction_id_mapping['point'].copy()
256256
point.update({
257257
"point": {
258258
"x": 147.692,
@@ -264,7 +264,7 @@ def point_inference(prediction_id_mapping):
264264

265265
@pytest.fixture
266266
def entity_inference(prediction_id_mapping):
267-
entity = prediction_id_mapping['named-entity']
267+
entity = prediction_id_mapping['named-entity'].copy()
268268
entity.update({"location" : {
269269
"start" : 67,
270270
"end" : 128
@@ -274,7 +274,7 @@ def entity_inference(prediction_id_mapping):
274274

275275
@pytest.fixture
276276
def segmentation_inference(prediction_id_mapping):
277-
segmentation = prediction_id_mapping['superpixel']
277+
segmentation = prediction_id_mapping['superpixel'].copy()
278278
segmentation.update({'mask' : {
279279
'instanceURI' : "sampleuri",
280280
'colorRGB' : [0,0,0]
@@ -285,7 +285,7 @@ def segmentation_inference(prediction_id_mapping):
285285

286286
@pytest.fixture
287287
def checklist_inference(prediction_id_mapping):
288-
checklist = prediction_id_mapping['checklist']
288+
checklist = prediction_id_mapping['checklist'].copy()
289289
checklist.update({'answers' : [
290290
{
291291
'schemaId' : checklist['tool']['options'][0]['featureSchemaId']
@@ -297,14 +297,44 @@ def checklist_inference(prediction_id_mapping):
297297

298298
@pytest.fixture
299299
def text_inference(prediction_id_mapping):
300-
text = prediction_id_mapping['text']
300+
text = prediction_id_mapping['text'].copy()
301301
text.update({
302302
'answer' : "free form text..."
303303

304304
})
305305
del text['tool']
306306
return text
307307

308+
309+
@pytest.fixture
310+
def video_checklist_inference(prediction_id_mapping):
311+
checklist = prediction_id_mapping['checklist'].copy()
312+
checklist.update({'answers' : [
313+
{
314+
'schemaId' : checklist['tool']['options'][0]['featureSchemaId']
315+
}
316+
]
317+
})
318+
319+
checklist.update({
320+
"frames": [
321+
{
322+
"start": 7,
323+
"end": 13,
324+
},
325+
{
326+
"start": 18,
327+
"end": 19,
328+
}
329+
]})
330+
del checklist['tool']
331+
return checklist
332+
333+
334+
335+
336+
337+
308338
@pytest.fixture
309339
def predictions(polygon_inference,
310340
rectangle_inference,

tests/integration/test_ndjon_validation.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,3 +167,7 @@ def test_validate_ndjson_uuid(tmp_path, configured_project, predictions):
167167
with pytest.raises(NDJsonError):
168168
configured_project.upload_annotations(name="name", annotations=repeat_uuid)
169169

170+
171+
def test_video_upload(video_checklist_inference, configured_project):
172+
pred = video_checklist_inference.copy()
173+
_validate_ndjson([pred], configured_project)

0 commit comments

Comments
 (0)