[SN-120] video composite mask (#1443)

ovalle15 · web-flow · commit 7ffa1098e1f4 · 2024-02-29T14:26:17.000-05:00
Added composite mask examples in video
diff --git a/examples/annotation_import/image.ipynb b/examples/annotation_import/image.ipynb
@@ -34,11 +34,11 @@
       "metadata": {},
       "source": [
         "# Image annotation import\n",
-        "This notebook will provide examples of each supported annotation type for image assets. \n",
+        "This notebook will provide examples of each supported annotation type for image assets.\n",
         "\n",
         "### [Model-assisted labeling (MAL)](https://docs.labelbox.com/docs/model-assisted-labeling)\n",
         "\n",
-        "* This workflow allows you to import computer-generated predictions (or simply annotations created outside of Labelbox) as pre-labels on an asset. \n",
+        "* This workflow allows you to import computer-generated predictions (or simply annotations created outside of Labelbox) as pre-labels on an asset.\n",
         "\n",
         "The imported annotations will be pre-populated in the labeling editor. However, in order to convert the pre-labels to real annotations, a human labeler will still need to open the Data Row in the Editor and submit it. This functionality is designed to speed up human labeling.\n",
         "\n",
@@ -264,7 +264,7 @@
     {
       "metadata": {},
       "source": [
-        "### Classification: Free-form text "
+        "### Classification: Free-form text"
       ],
       "cell_type": "markdown"
     },
diff --git a/examples/annotation_import/video.ipynb b/examples/annotation_import/video.ipynb
@@ -67,11 +67,15 @@
     {
       "metadata": {},
       "source": [
-        "import labelbox as lb\n",
-        "import labelbox.types as lb_types\n",
         "import uuid\n",
+        "from PIL import Image\n",
+        "import requests\n",
         "import base64\n",
-        "import requests"
+        "import labelbox as lb\n",
+        "import labelbox.types as lb_types\n",
+        "from io import BytesIO\n",
+        "import pprint\n",
+        "pp = pprint.PrettyPrinter(indent=4)"
       ],
       "cell_type": "code",
       "outputs": [],
@@ -675,82 +679,109 @@
       ],
       "cell_type": "markdown"
     },
+    {
+      "metadata": {},
+      "source": [
+        "def extract_rgb_colors_from_url(image_url):\n",
+        "    response = requests.get(image_url)\n",
+        "    img = Image.open(BytesIO(response.content))\n",
+        "\n",
+        "    colors = set()\n",
+        "    for x in range(img.width):\n",
+        "        for y in range(img.height):\n",
+        "            pixel = img.getpixel((x, y))\n",
+        "            if pixel[:3] != (0,0,0):\n",
+        "                colors.add(pixel[:3])  # Get only the RGB values\n",
+        "\n",
+        "    return colors"
+      ],
+      "cell_type": "code",
+      "outputs": [],
+      "execution_count": null
+    },
     {
       "metadata": {},
       "source": [
         "### Raster Segmentation (Byte string array)\n",
-        "url = \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/color_mask.png\"\n",
-        "response = requests.get(url)\n",
+        "##  For this example we are going to to pass all the annotations payload in a single VideoMaskAnnotation\n",
         "\n",
         "\n",
-        "video_mask_annotation_bytes = [\n",
-        "    lb_types.VideoMaskAnnotation(\n",
-        "        frames=[\n",
-        "            lb_types.MaskFrame(\n",
-        "                index=20,\n",
-        "                im_bytes=response.content # Instead of bytes you could also pass an instance URI : instance_uri=url\n",
-        "            )\n",
-        "        ],\n",
-        "        instances=[\n",
-        "            lb_types.MaskInstance(color_rgb=(255, 255, 1), name= \"video_mask\")\n",
-        "        ]\n",
+        "# Single mask\n",
+        "url = \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/frame_24_composite_mask.png\"\n",
+        "response = requests.get(url)\n",
+        "img_bytes = base64.b64encode(response.content).decode('utf-8')\n",
+        "\n",
+        "# We are generating our frames and instances in this step, and will later add them to the VideoMaskAnnotation that will contain\n",
+        "# all frames and instances\n",
+        "frames_mask_single=[\n",
+        "    lb_types.MaskFrame(\n",
+        "        index=20,\n",
+        "        im_bytes=response.content # Instead of bytes you could also pass an instance URI : instance_uri=url\n",
         "    )\n",
         "]\n",
-        "img_bytes = base64.b64encode(response.content).decode('utf-8')\n",
-        "# NDJSON\n",
-        "video_mask_ndjson_bytes = {\n",
-        "    'masks': {\n",
-        "      'frames': [\n",
-        "          {\n",
-        "              \"index\" : 20,\n",
-        "              \"imBytes\": img_bytes,\n",
-        "          }\n",
-        "      ],\n",
-        "      'instances': [\n",
-        "          {\n",
-        "              \"colorRGB\" : [255, 255, 1],\n",
-        "              \"name\" : \"video_mask\"\n",
-        "          }\n",
-        "      ]\n",
-        "    }\n",
-        " }\n",
+        "instances_mask_single=[\n",
+        "    lb_types.MaskInstance(color_rgb=(76, 104, 177), name= \"video_mask\")\n",
+        "]\n",
         "\n",
-        "# Python annotation - same mask on multiple frames (note that tracking is not supported with masks tools)\n",
-        "video_mask_annotation_bytes_2 = [\n",
-        "    lb_types.VideoMaskAnnotation(\n",
-        "        frames=[\n",
-        "            lb_types.MaskFrame(\n",
-        "                index=23,\n",
-        "                im_bytes=response.content\n",
-        "            ),\n",
+        "\n",
+        "## Add multiple masks using multiple tools in different frames - Note that only once composite mask can exist per frame\n",
+        "frames_cp_mask_url = [\n",
+        "    {\"1\": \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/frame_1_composite_mask.png\"},\n",
+        "    {\"24\": \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/frame_24_composite_mask.png\"},\n",
+        "    {\"26\": \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/frame_26_composite_mask.png\" }\n",
+        "]\n",
+        "\n",
+        "rgb_mask_tool = [(227, 135, 126) ,(169, 248, 152),(83, 152, 103)]\n",
+        "cp_masks = []\n",
+        "unique_colors = set()\n",
+        "\n",
+        "\n",
+        "lb_frames = []\n",
+        "lb_instances = []\n",
+        "counter = 0\n",
+        "\n",
+        "for d in frames_cp_mask_url:\n",
+        "    for frame_no, v in d.items():\n",
+        "        response = requests.get(v)\n",
+        "        colors = extract_rgb_colors_from_url(v)\n",
+        "        for color in colors:\n",
+        "            if not color in unique_colors:\n",
+        "                unique_colors.add(color)\n",
+        "                name = \"video_mask\" if color in rgb_mask_tool else \"mask_with_text_subclass\"\n",
+        "                lb_instances.append(lb_types.MaskInstance(color_rgb=color, name=name))\n",
+        "                counter += 1\n",
+        "        lb_frames.append(\n",
         "            lb_types.MaskFrame(\n",
-        "                index=20,\n",
+        "                index=frame_no,\n",
         "                im_bytes=response.content\n",
         "            )\n",
-        "        ],\n",
-        "        instances=[\n",
-        "            lb_types.MaskInstance(color_rgb=(255, 1, 1), name= \"video_mask\")\n",
-        "        ]\n",
-        "    )\n",
-        "]\n",
+        "        )\n",
+        "cp_masks.append(lb_types.VideoMaskAnnotation(\n",
+        "    frames=lb_frames + frames_mask_single,\n",
+        "    instances=lb_instances + instances_mask_single\n",
+        "))\n",
         "\n",
+        "pp.pprint(lb_frames)\n",
+        "pp.pprint(cp_masks)\n",
         "\n",
-        "# NDJSON\n",
+        "\n",
+        "\n",
+        "# NDJSON - single tool\n",
         "video_mask_ndjson_bytes_2 = {\n",
         "    'masks': {\n",
         "      'frames': [\n",
         "          {\n",
-        "              \"index\" : 20,\n",
+        "              \"index\" : 31,\n",
         "              \"imBytes\": img_bytes,\n",
         "          },\n",
         "          {\n",
-        "              \"index\" : 23,\n",
+        "              \"index\" : 34,\n",
         "              \"imBytes\": img_bytes,\n",
         "          }\n",
         "      ],\n",
         "      'instances': [\n",
         "          {\n",
-        "              \"colorRGB\" : [255, 1, 1],\n",
+        "              \"colorRGB\" : [76, 104, 177],\n",
         "              \"name\" : \"video_mask\"\n",
         "          }\n",
         "      ]\n",
@@ -886,8 +917,15 @@
         ")\n",
         "task = dataset.create_data_rows([asset])\n",
         "task.wait_till_done()\n",
-        "print(\"Errors :\",task.errors)\n",
-        "print(\"Failed data rows:\" ,task.failed_data_rows)"
+        "print(f\"Failed data rows: {task.failed_data_rows}\")\n",
+        "print(f\"Errors: {task.errors}\")\n",
+        "\n",
+        "if task.errors:\n",
+        "    for error in task.errors:\n",
+        "        if 'Duplicate global key' in error['message'] and dataset.row_count == 0:\n",
+        "            # If the global key already  exists in the workspace the dataset will be created empty, so we can delete it.\n",
+        "            print(f\"Deleting empty dataset: {dataset}\")\n",
+        "            dataset.delete()"
       ],
       "cell_type": "code",
       "outputs": [],
@@ -928,7 +966,15 @@
         "              ]\n",
         "            )\n",
         "          ]\n",
-        "        )\n",
+        "        ),\n",
+        "        lb.Tool(tool=lb.Tool.Type.RASTER_SEGMENTATION,\n",
+        "                name=\"mask_with_text_subclass\",\n",
+        "                classifications=[\n",
+        "                    lb.Classification(\n",
+        "                        class_type=lb.Classification.Type.TEXT,\n",
+        "                        name=\"sub_free_text\")\n",
+        "                    ]\n",
+        "                )\n",
         "    ],\n",
         "    classifications=[\n",
         "        lb.Classification(\n",
@@ -1088,8 +1134,7 @@
         "          nested_checklist_annotation,\n",
         "          nested_radio_annotation,\n",
         "          text_annotation,\n",
-        "          video_mask_annotation_bytes,\n",
-        "          video_mask_annotation_bytes_2\n",
+        "          cp_masks\n",
         "      ]\n",
         "\n",
         "for annotation in annotations_list:\n",
@@ -1138,9 +1183,7 @@
         "    text_annotation_ndjson,\n",
         "    bbox_frame_annotation_ndjson,\n",
         "    bbox_frame_annotation_ndjson2,\n",
-        "    video_mask_ndjson_bytes,\n",
-        "    video_mask_ndjson_bytes_2,\n",
-        "\n",
+        "    video_mask_ndjson_bytes_2\n",
         "]\n",
         "\n",
         "for annotation in annotations_list_ndjson:\n",
@@ -1229,19 +1272,11 @@
       "source": [
         "# Delete Project\n",
         "# project.delete()\n",
-        "# dataset.delete()\n",
-        "\n"
+        "#dataset.delete()\n"
       ],
       "cell_type": "code",
       "outputs": [],
       "execution_count": null
-    },
-    {
-      "metadata": {},
-      "source": [],
-      "cell_type": "code",
-      "outputs": [],
-      "execution_count": null
     }
   ]
 }