import-prelabels-image-streamable-2

Sravani Mutta · Sravani Mutta · commit 181e9d64f295 · 2024-03-27T22:52:20.000+05:30
diff --git a/examples/annotation_import/import_labeled_dataset_image.ipynb b/examples/annotation_import/import_labeled_dataset_image.ipynb
@@ -229,34 +229,47 @@
     {
       "metadata": {},
       "source": [
-        "# Export data rows from the dataset\n",
-        "data_rows = [dr.uid for dr in dataset.export_data_rows()]\n",
+        "client.enable_experimental = True\n",
         "\n",
-        "# Randomly select 200 Data Rows (or fewer if the dataset has less than 200 data rows)\n",
-        "sampled_data_rows = random.sample(data_rows, min(len(data_rows), 200))\n",
+        "# Minimal ExportV2 parameters focused solely on data row IDs\n",
+        "export_params = {\n",
+        "    \"data_row_details\": True  # Only export data row details \n",
+        "}\n",
         "\n",
-        "# Create a new batch in the project and add the sampled data rows\n",
-        "batch = project.create_batch(\n",
-        "    \"Initial batch\",  # name of the batch\n",
-        "    sampled_data_rows,  # list of Data Rows\n",
-        "    1  # priority between 1-5\n",
-        ")\n",
-        "print(f\"Created batch with ID: {batch.uid}\")"
-      ],
-      "cell_type": "code",
-      "outputs": [],
-      "execution_count": null
-    },
-    {
-      "metadata": {},
-      "source": [
-        "queued_data_rows = project.export_queued_data_rows()\n",
-        "labels = []\n",
+        "# Initiate the streamable export task\n",
+        "dataset = client.get_dataset(dataset.uid)  # Update with the actual dataset ID\n",
+        "export_task = dataset.export(params=export_params)\n",
+        "export_task.wait_till_done()\n",
+        "print(export_task)\n",
+        "\n",
+        "data_rows = []\n",
+        "\n",
+        "def json_stream_handler(output: lb.JsonConverterOutput):\n",
+        "  data_row = json.loads(output.json_str)\n",
+        "  data_rows.append(data_row)\n",
         "\n",
-        "for datarow in queued_data_rows:\n",
+        "\n",
+        "if export_task.has_errors():\n",
+        "  export_task.get_stream(\n",
+        "  \n",
+        "  converter=lb.JsonConverter(),\n",
+        "  stream_type=lb.StreamType.ERRORS\n",
+        "  ).start(stream_handler=lambda error: print(error))\n",
+        "\n",
+        "if export_task.has_result():\n",
+        "  export_json = export_task.get_stream(\n",
+        "    converter=lb.JsonConverter(),\n",
+        "    stream_type=lb.StreamType.RESULT\n",
+        "  ).start(stream_handler=json_stream_handler)\n",
+        "\n",
+        "labels = []\n",
+        "for datarow in data_rows:\n",
         "    annotations_list = []\n",
-        "    folder = datarow['externalId'].split(\"/\")[0]\n",
-        "    id = datarow['externalId'].split(\"/\")[1]\n",
+        "    # Access the 'data_row' dictionary first\n",
+        "    data_row_dict = datarow['data_row']\n",
+        "    folder = data_row_dict['external_id'].split(\"/\")[0]\n",
+        "    id = data_row_dict['external_id'].split(\"/\")[1]\n",
+        "    \n",
         "    if folder == \"positive_image_set\":\n",
         "        for image in annotations['images']:\n",
         "            if image['file_name'] == id:\n",
@@ -271,21 +284,12 @@
         "                                class_name = category['name']\n",
         "                                break\n",
         "                        if class_name:\n",
-        "                            annotations_list.append(ObjectAnnotation(\n",
+        "                          annotations_list.append(ObjectAnnotation(\n",
         "                                name=class_name,\n",
         "                                value=Rectangle(start=Point(x=bbox[0], y=bbox[1]), end=Point(x=bbox[2]+bbox[0], y=bbox[3]+bbox[1]))\n",
         "                            ))\n",
-        "    image_data = ImageData(uid=datarow['id'])\n",
-        "    labels.append(Label(data=image_data, annotations=annotations_list))\n"
-      ],
-      "cell_type": "code",
-      "outputs": [],
-      "execution_count": null
-    },
-    {
-      "metadata": {},
-      "source": [
-        "print(labels)"
+        "    image_data = ImageData(uid=data_row_dict['id'])\n",
+        "    labels.append(Label(data=image_data, annotations=annotations_list))"
       ],
       "cell_type": "code",
       "outputs": [],