1010from typing import Tuple
1111from typing import Union
1212from uuid import UUID
13+ from pydantic import BaseModel , validator
1314
1415import backoff
1516import ndjson
@@ -243,7 +244,7 @@ def create_from_objects(cls, client, project_id: str, name: str,
243244 }
244245 }``
245246
246- Args:
247+ Args:x
247248 client (Client): a Labelbox client
248249 project_id (str): id of project for which predictions will be imported
249250 name (str): name of BulkImportRequest
@@ -315,26 +316,8 @@ def create_from_local_file(cls,
315316 return cls (client , response_data ["createBulkImportRequest" ])
316317
317318
318- """
319- #Outstanding questions:
320-
321- * How to check data row media type?
322- * Video
323- - annotations without frames indices wouldn't be flagged right now
324- * Everything else
325- - We won't know if a text tool is being used for video.
326- - Or a tool only support for images is being used for video
327- ... etc
328-
329- - video only supports radio and checklist tools.
330- - This would be good to validate here.
331-
332- * While this is a pretty decent check it isn't going to be 100% since we aren't examining the actual data rows.
333- * Eg entity recognition we can't check if the index is greater than the doc length.
334319
335320
336- """
337-
338321def _validate_uuids (lines : Iterable [Dict [str , Any ]]) -> None :
339322 """Validate individual ndjson lines.
340323 - verifies that uuids are unique
@@ -348,6 +331,9 @@ def _validate_uuids(lines: Iterable[Dict[str, Any]]) -> None:
348331 'must be unique for the project.' )
349332 uuids .add (uuid )
350333
334+
335+
336+
351337def parse_classification (tool ):
352338 """
353339 Only radio, checklist, and text are supported for mal
@@ -371,15 +357,9 @@ def get_valid_feature_schemas(project):
371357 return valid_feature_schemas
372358
373359
374- from pydantic import BaseModel , validator
375- #Note that pydantic is a bit strict. It could break workflows of people who throw crap into the payload
376- #Maybe set validate to false by default?
377360
378- #How do I add subclasses?
379- #What is valid?
380- #Here is your answer :)
381361
382- LabelboxID = str #todo
362+ LabelboxID = constr ( min_length = 25 , max_length = 25 , strict = True )
383363
384364#TODO: Is this defined elsewhere?
385365class Bbox (TypedDict ):
@@ -399,24 +379,27 @@ class Feature(BaseModel):
399379 schemaId : LabelboxID
400380
401381 class Config :
402- #We don 't want them to add extra stuff to the payload
382+ #Users shouldn 't to add extra data to the payload
403383 extra = 'forbid'
404384
405-
406385#Do this classes need to support uuids?
407386class Text (Feature ):
408387 ontology_type : str = "text"
409388 answer : str
410389
411- class CheckList (Feature ):
390+ class VideoSupported (Feature ):
391+ #Note that frames are only allowed as top level inferences for video
392+ frames : Optional [List [TypedDict ("frames" , {"end" : int , "start" : int })]]
393+
394+
395+ class CheckList (VideoSupported ):
412396 ontology_type : str = "checklist"
413397 answers : conlist (TypedDict ('schemaId' , {'schemaId' : LabelboxID }), min_items = 1 )
414398
415- class Radio (Feature ):
399+ class Radio (VideoSupported ):
416400 ontology_type : str = "radio"
417401 answer : TypedDict ('schemaId' , {'schemaId' : LabelboxID })
418402
419-
420403class Tool (Feature ):
421404 classifications : List [Union [CheckList , Text , Radio ]] = []
422405
@@ -479,6 +462,8 @@ def is_valid_mask(cls, v):
479462 raise ValueError (f"All rgb colors must be between 0 and 255. Found : { colors } " )
480463 return v
481464
465+
466+
482467class Annotation (BaseModel ):
483468 uuid : UUID
484469 dataRow : TypedDict ('dataRow' , {'id' : LabelboxID })
@@ -525,6 +510,14 @@ def validate(self, valid_datarows, valid_feature_schemas):
525510
526511
527512def _validate_ndjson (lines : Iterable [Dict [str , Any ]], project ) -> None :
513+ """
514+ Notes:
515+ - Validation doesn't check data row data types.
516+ This means we don't check to make sure that the annotation is valid for a particular data type.
517+ - video only supports radio and checklist tools and requires frame indices which we don't check for.
518+ - We also forbid extra so that might be too strict...
519+ - We also aren't checking bounds of the assets (eg frame index, image height, text length)
520+ """
528521 data_row_ids = {data_row .uid : data_row for dataset in project .datasets () for data_row in dataset .data_rows ()}
529522 feature_schemas = get_valid_feature_schemas (project )
530523 uids = set ()
@@ -542,5 +535,5 @@ def _validate_ndjson(lines: Iterable[Dict[str, Any]], project) -> None:
542535 except (ValidationError , ValueError ) as e :
543536 raise labelbox .exceptions .NDJsonError (f"Invalid NDJson on line { idx } " ) from e
544537
545-
546538
539+
0 commit comments