added video checking

Matt Sokoloff · Matt Sokoloff · commit 7a787dc80081 · 2021-03-02T19:20:53.000-05:00
diff --git a/Dockerfile b/Dockerfile
@@ -2,7 +2,11 @@ FROM python:3.7
 
 RUN pip install pytest
 
-COPY . /usr/src/labelbox
+
 WORKDIR /usr/src/labelbox
+COPY requirements.txt /usr/src/labelbox
+RUN pip install -r requirements.txt
+COPY . /usr/src/labelbox
+
 
 RUN python setup.py install
diff --git a/labelbox/schema/bulk_import_request.py b/labelbox/schema/bulk_import_request.py
@@ -10,6 +10,7 @@
 from typing import Tuple
 from typing import Union
 from uuid import UUID
+from pydantic import BaseModel, validator
 
 import backoff
 import ndjson
@@ -243,7 +244,7 @@ def create_from_objects(cls, client, project_id: str, name: str,
             }
         }``
 
-        Args:
+        Args:x
             client (Client): a Labelbox client
             project_id (str): id of project for which predictions will be imported
             name (str): name of BulkImportRequest
@@ -315,26 +316,8 @@ def create_from_local_file(cls,
         return cls(client, response_data["createBulkImportRequest"])
 
 
-"""     
-#Outstanding questions:
-
-* How to check data row media type?
-    * Video
-        - annotations without frames indices wouldn't be flagged right now
-    * Everything else
-        - We won't know if a text tool is being used for video. 
-        - Or a tool only support for images is being used for video
-        ... etc
-
-- video only supports radio and checklist tools. 
-    - This would be good to validate here.
-
-* While this is a pretty decent check it isn't going to be 100% since we aren't examining the actual data rows.
-* Eg entity recognition we can't check if the index is greater than the doc length.
 
 
-"""
-
 def _validate_uuids(lines: Iterable[Dict[str, Any]]) -> None:
     """Validate individual ndjson lines.
         - verifies that uuids are unique
@@ -348,6 +331,9 @@ def _validate_uuids(lines: Iterable[Dict[str, Any]]) -> None:
                 'must be unique for the project.')
         uuids.add(uuid)
 
+
+
+
 def parse_classification(tool):
     """
     Only radio, checklist, and text are supported for mal
@@ -371,15 +357,9 @@ def get_valid_feature_schemas(project):
     return valid_feature_schemas
 
 
-from pydantic import BaseModel, validator
-#Note that pydantic is a bit strict. It could break workflows of people who throw crap into the payload
-#Maybe set validate to false by default?
 
-#How do I add subclasses?
-#What is valid?
-#Here is your answer :)
 
-LabelboxID = str #todo
+LabelboxID = constr(min_length=25, max_length=25, strict=True)
 
 #TODO: Is this defined elsewhere?
 class Bbox(TypedDict):
@@ -399,24 +379,27 @@ class Feature(BaseModel):
     schemaId: LabelboxID
 
     class Config:
-        #We don't want them to add extra stuff to the payload
+        #Users shouldn't to add extra data to the payload
         extra = 'forbid'
 
-
 #Do this classes need to support uuids?
 class Text(Feature):
     ontology_type: str = "text"
     answer: str
 
-class CheckList(Feature):
+class VideoSupported(Feature):
+    #Note that frames are only allowed as top level inferences for video
+    frames : Optional[List[TypedDict("frames", {"end" : int, "start" : int})]]
+
+
+class CheckList(VideoSupported):
     ontology_type: str = "checklist"
     answers: conlist(TypedDict('schemaId', {'schemaId': LabelboxID}), min_items = 1)
     
-class Radio(Feature):
+class Radio(VideoSupported):
     ontology_type: str = "radio"
     answer: TypedDict('schemaId' , {'schemaId': LabelboxID})
 
-
 class Tool(Feature):
     classifications : List[Union[CheckList, Text, Radio]] = []
 
@@ -479,6 +462,8 @@ def is_valid_mask(cls, v):
             raise ValueError(f"All rgb colors must be between 0 and 255. Found : {colors}")
         return v
 
+
+
 class Annotation(BaseModel):
     uuid: UUID
     dataRow: TypedDict('dataRow' , {'id' : LabelboxID})
@@ -525,6 +510,14 @@ def validate(self, valid_datarows, valid_feature_schemas):
 
 
 def _validate_ndjson(lines: Iterable[Dict[str, Any]], project) -> None:
+    """     
+    Notes:
+        - Validation doesn't check data row data types. 
+        This means we don't check to make sure that the annotation is valid for a particular data type.
+        - video only supports radio and checklist tools and requires frame indices which we don't check for.
+        - We also forbid extra so that might be too strict...
+        - We also aren't checking bounds of the assets (eg frame index, image height, text length)
+    """
     data_row_ids = {data_row.uid : data_row for dataset in project.datasets() for data_row in dataset.data_rows()}
     feature_schemas = get_valid_feature_schemas(project)
     uids = set()
@@ -542,5 +535,5 @@ def _validate_ndjson(lines: Iterable[Dict[str, Any]], project) -> None:
         except (ValidationError, ValueError) as e:
             raise labelbox.exceptions.NDJsonError(f"Invalid NDJson on line {idx}") from e
         
-    
 
+    
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
@@ -196,7 +196,7 @@ def prediction_id_mapping(configured_project):
 
 @pytest.fixture
 def polygon_inference(prediction_id_mapping):
-    polygon = prediction_id_mapping['polygon']
+    polygon = prediction_id_mapping['polygon'].copy()
     polygon.update(
 {
                 "polygon": [{
@@ -219,7 +219,7 @@ def polygon_inference(prediction_id_mapping):
 
 @pytest.fixture
 def rectangle_inference(prediction_id_mapping):
-    rectangle = prediction_id_mapping['rectangle']
+    rectangle = prediction_id_mapping['rectangle'].copy()
     rectangle.update( {"bbox": {
                     "top": 48,
                     "left": 58,
@@ -236,7 +236,7 @@ def rectangle_inference(prediction_id_mapping):
 
 @pytest.fixture
 def line_inference(prediction_id_mapping):
-    line = prediction_id_mapping['line']
+    line = prediction_id_mapping['line'].copy()
     line.update({
                     "line": [{
                     "x": 147.692,
@@ -252,7 +252,7 @@ def line_inference(prediction_id_mapping):
 
 @pytest.fixture
 def point_inference(prediction_id_mapping):
-    point = prediction_id_mapping['point']
+    point = prediction_id_mapping['point'].copy()
     point.update({
         "point": {
                     "x": 147.692,
@@ -264,7 +264,7 @@ def point_inference(prediction_id_mapping):
 
 @pytest.fixture
 def entity_inference(prediction_id_mapping):
-    entity = prediction_id_mapping['named-entity']
+    entity = prediction_id_mapping['named-entity'].copy()
     entity.update({"location" : {
                 "start" : 67,
                 "end" : 128
@@ -274,7 +274,7 @@ def entity_inference(prediction_id_mapping):
 
 @pytest.fixture
 def segmentation_inference(prediction_id_mapping):
-    segmentation = prediction_id_mapping['superpixel']
+    segmentation = prediction_id_mapping['superpixel'].copy()
     segmentation.update({'mask' : {
                 'instanceURI' : "sampleuri",
                 'colorRGB' : [0,0,0]
@@ -285,7 +285,7 @@ def segmentation_inference(prediction_id_mapping):
 
 @pytest.fixture
 def checklist_inference(prediction_id_mapping):
-    checklist = prediction_id_mapping['checklist']
+    checklist = prediction_id_mapping['checklist'].copy()
     checklist.update({'answers' : [
                     {
                         'schemaId' : checklist['tool']['options'][0]['featureSchemaId']
@@ -297,14 +297,44 @@ def checklist_inference(prediction_id_mapping):
 
 @pytest.fixture
 def text_inference(prediction_id_mapping):
-    text = prediction_id_mapping['text']
+    text = prediction_id_mapping['text'].copy()
     text.update({
                 'answer' : "free form text..."
   
             })
     del text['tool']
     return text   
 
+
+@pytest.fixture
+def video_checklist_inference(prediction_id_mapping):
+    checklist = prediction_id_mapping['checklist'].copy()
+    checklist.update({'answers' : [
+                    {
+                        'schemaId' : checklist['tool']['options'][0]['featureSchemaId']
+                    }
+                ]
+            })
+
+    checklist.update({
+        "frames": [
+        {
+            "start": 7,
+            "end": 13,
+        },
+        {
+            "start": 18,
+            "end": 19,
+        }
+    ]})
+    del checklist['tool']              
+    return checklist   
+
+
+
+
+
+
 @pytest.fixture
 def predictions(polygon_inference,
               rectangle_inference, 
diff --git a/tests/integration/test_ndjon_validation.py b/tests/integration/test_ndjon_validation.py
@@ -167,3 +167,7 @@ def test_validate_ndjson_uuid(tmp_path, configured_project, predictions):
     with pytest.raises(NDJsonError):
         configured_project.upload_annotations(name="name", annotations=repeat_uuid)
 
+
+def test_video_upload(video_checklist_inference, configured_project):
+    pred = video_checklist_inference.copy()
+    _validate_ndjson([pred], configured_project)