import-prelabels-image-streamable-3

Sravani Mutta · Sravani Mutta · commit d53af0e771bc · 2024-03-27T23:12:30.000+05:30
diff --git a/examples/annotation_import/import_labeled_dataset_image.ipynb b/examples/annotation_import/import_labeled_dataset_image.ipynb
@@ -226,6 +226,13 @@
       "outputs": [],
       "execution_count": null
     },
+    {
+      "metadata": {},
+      "source": [
+        "# Send a batch of data rows to the project"
+      ],
+      "cell_type": "markdown"
+    },
     {
       "metadata": {},
       "source": [
@@ -236,14 +243,68 @@
         "    \"data_row_details\": True  # Only export data row details \n",
         "}\n",
         "\n",
-        "# Initiate the streamable export task\n",
+        "# Initiate the streamable export task from catalog\n",
         "dataset = client.get_dataset(dataset.uid)  # Update with the actual dataset ID\n",
         "export_task = dataset.export(params=export_params)\n",
         "export_task.wait_till_done()\n",
         "print(export_task)\n",
         "\n",
         "data_rows = []\n",
         "\n",
+        "# Callback used for JSON Converter to correctly collect data row IDs\n",
+        "def json_stream_handler(output: lb.JsonConverterOutput):\n",
+        "    # Parse the JSON string to access the data\n",
+        "    data = json.loads(output.json_str)\n",
+        "\n",
+        "    # Correctly extract and append DataRow ID\n",
+        "    if 'data_row' in data and 'id' in data['data_row']:\n",
+        "        data_rows.append(data['data_row']['id'])\n",
+        "\n",
+        "# Process the stream if there are results\n",
+        "if export_task.has_result():\n",
+        "    export_task.get_stream(\n",
+        "        converter=lb.JsonConverter(),\n",
+        "        stream_type=lb.StreamType.RESULT\n",
+        "    ).start(stream_handler=json_stream_handler)\n",
+        "\n",
+        "# Randomly select 200 Data Rows (or fewer if the dataset has less than 200 data rows)\n",
+        "sampled_data_rows = random.sample(data_rows, min(len(data_rows), 200))\n",
+        "\n",
+        "# Create a new batch in the project and add the sampled data rows\n",
+        "batch = project.create_batch(\n",
+        "    \"Initial batch\",  # name of the batch\n",
+        "    sampled_data_rows,  # list of Data Rows\n",
+        "    1  # priority between 1-5\n",
+        ")\n",
+        "print(f\"Created batch with ID: {batch.uid}\")"
+      ],
+      "cell_type": "code",
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "metadata": {},
+      "source": [
+        "# Create annotations payload"
+      ],
+      "cell_type": "markdown"
+    },
+    {
+      "metadata": {},
+      "source": [
+        "\n",
+        "# Set export parameters focused on data row details\n",
+        "export_params = {\n",
+        "    \"data_row_details\": True,  # Only export data row details\n",
+        "    \"batch_ids\": [batch.uid],  # Optional: Include batch ids to filter by specific batches\n",
+        "}\n",
+        "\n",
+        "# Initialize the streamable export task from project\n",
+        "export_task = project.export(params=export_params)\n",
+        "export_task.wait_till_done()\n",
+        "\n",
+        "data_rows = []\n",
+        "\n",
         "def json_stream_handler(output: lb.JsonConverterOutput):\n",
         "  data_row = json.loads(output.json_str)\n",
         "  data_rows.append(data_row)\n",
@@ -284,7 +345,7 @@
         "                                class_name = category['name']\n",
         "                                break\n",
         "                        if class_name:\n",
-        "                          annotations_list.append(ObjectAnnotation(\n",
+        "                            annotations_list.append(ObjectAnnotation(\n",
         "                                name=class_name,\n",
         "                                value=Rectangle(start=Point(x=bbox[0], y=bbox[1]), end=Point(x=bbox[2]+bbox[0], y=bbox[3]+bbox[1]))\n",
         "                            ))\n",