|
3 | 3 | "nbformat_minor": 0, |
4 | 4 | "metadata": {}, |
5 | 5 | "cells": [ |
| 6 | + { |
| 7 | + "metadata": {}, |
| 8 | + "source": [ |
| 9 | + "<td>\n", |
| 10 | + " <a target=\"_blank\" href=\"https://labelbox.com\" ><img src=\"https://labelbox.com/blog/content/images/2021/02/logo-v4.svg\" width=256/></a>\n", |
| 11 | + "</td>" |
| 12 | + ], |
| 13 | + "cell_type": "markdown" |
| 14 | + }, |
| 15 | + { |
| 16 | + "metadata": {}, |
| 17 | + "source": [ |
| 18 | + "<td>\n", |
| 19 | + "<a href=\"https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/annotation_import/import_labeled_dataset_image.ipynb\" target=\"_blank\"><img\n", |
| 20 | + "src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>\n", |
| 21 | + "</td>\n", |
| 22 | + "\n", |
| 23 | + "\n", |
| 24 | + "\n", |
| 25 | + "\n", |
| 26 | + "<td>\n", |
| 27 | + "<a href=\"https://github.com/Labelbox/labelbox-python/blob/master/examples/annotation_import/import_labeled_dataset_image.ipynb\" target=\"_blank\"><img\n", |
| 28 | + "src=\"https://img.shields.io/badge/GitHub-100000?logo=github&logoColor=white\" alt=\"GitHub\"></a>\n", |
| 29 | + "</td>" |
| 30 | + ], |
| 31 | + "cell_type": "markdown" |
| 32 | + }, |
6 | 33 | { |
7 | 34 | "metadata": {}, |
8 | 35 | "source": [ |
|
229 | 256 | { |
230 | 257 | "metadata": {}, |
231 | 258 | "source": [ |
232 | | - "# Export data rows from the dataset\n", |
233 | | - "data_rows = [dr.uid for dr in dataset.export_data_rows()]\n", |
| 259 | + "# Send a batch of data rows to the project" |
| 260 | + ], |
| 261 | + "cell_type": "markdown" |
| 262 | + }, |
| 263 | + { |
| 264 | + "metadata": {}, |
| 265 | + "source": [ |
| 266 | + "client.enable_experimental = True\n", |
| 267 | + "\n", |
| 268 | + "# Minimal ExportV2 parameters focused solely on data row IDs\n", |
| 269 | + "export_params = {\n", |
| 270 | + " \"data_row_details\": True # Only export data row details \n", |
| 271 | + "}\n", |
| 272 | + "\n", |
| 273 | + "# Initiate the streamable export task from catalog\n", |
| 274 | + "dataset = client.get_dataset(dataset.uid) # Update with the actual dataset ID\n", |
| 275 | + "export_task = dataset.export(params=export_params)\n", |
| 276 | + "export_task.wait_till_done()\n", |
| 277 | + "print(export_task)\n", |
| 278 | + "\n", |
| 279 | + "data_rows = []\n", |
| 280 | + "\n", |
| 281 | + "# Callback used for JSON Converter to correctly collect data row IDs\n", |
| 282 | + "def json_stream_handler(output: lb.JsonConverterOutput):\n", |
| 283 | + " # Parse the JSON string to access the data\n", |
| 284 | + " data = json.loads(output.json_str)\n", |
| 285 | + "\n", |
| 286 | + " # Correctly extract and append DataRow ID\n", |
| 287 | + " if 'data_row' in data and 'id' in data['data_row']:\n", |
| 288 | + " data_rows.append(data['data_row']['id'])\n", |
| 289 | + "\n", |
| 290 | + "# Process the stream if there are results\n", |
| 291 | + "if export_task.has_result():\n", |
| 292 | + " export_task.get_stream(\n", |
| 293 | + " converter=lb.JsonConverter(),\n", |
| 294 | + " stream_type=lb.StreamType.RESULT\n", |
| 295 | + " ).start(stream_handler=json_stream_handler)\n", |
234 | 296 | "\n", |
235 | 297 | "# Randomly select 200 Data Rows (or fewer if the dataset has less than 200 data rows)\n", |
236 | 298 | "sampled_data_rows = random.sample(data_rows, min(len(data_rows), 200))\n", |
|
250 | 312 | { |
251 | 313 | "metadata": {}, |
252 | 314 | "source": [ |
253 | | - "queued_data_rows = project.export_queued_data_rows()\n", |
254 | | - "labels = []\n", |
| 315 | + "# Create annotations payload" |
| 316 | + ], |
| 317 | + "cell_type": "markdown" |
| 318 | + }, |
| 319 | + { |
| 320 | + "metadata": {}, |
| 321 | + "source": [ |
| 322 | + "\n", |
| 323 | + "# Set export parameters focused on data row details\n", |
| 324 | + "export_params = {\n", |
| 325 | + " \"data_row_details\": True, # Only export data row details\n", |
| 326 | + " \"batch_ids\": [batch.uid], # Optional: Include batch ids to filter by specific batches\n", |
| 327 | + "}\n", |
| 328 | + "\n", |
| 329 | + "# Initialize the streamable export task from project\n", |
| 330 | + "export_task = project.export(params=export_params)\n", |
| 331 | + "export_task.wait_till_done()\n", |
| 332 | + "\n", |
| 333 | + "data_rows = []\n", |
| 334 | + "\n", |
| 335 | + "def json_stream_handler(output: lb.JsonConverterOutput):\n", |
| 336 | + " data_row = json.loads(output.json_str)\n", |
| 337 | + " data_rows.append(data_row)\n", |
255 | 338 | "\n", |
256 | | - "for datarow in queued_data_rows:\n", |
| 339 | + "\n", |
| 340 | + "if export_task.has_errors():\n", |
| 341 | + " export_task.get_stream(\n", |
| 342 | + " \n", |
| 343 | + " converter=lb.JsonConverter(),\n", |
| 344 | + " stream_type=lb.StreamType.ERRORS\n", |
| 345 | + " ).start(stream_handler=lambda error: print(error))\n", |
| 346 | + "\n", |
| 347 | + "if export_task.has_result():\n", |
| 348 | + " export_json = export_task.get_stream(\n", |
| 349 | + " converter=lb.JsonConverter(),\n", |
| 350 | + " stream_type=lb.StreamType.RESULT\n", |
| 351 | + " ).start(stream_handler=json_stream_handler)\n", |
| 352 | + "\n", |
| 353 | + "labels = []\n", |
| 354 | + "for datarow in data_rows:\n", |
257 | 355 | " annotations_list = []\n", |
258 | | - " folder = datarow['externalId'].split(\"/\")[0]\n", |
259 | | - " id = datarow['externalId'].split(\"/\")[1]\n", |
| 356 | + " # Access the 'data_row' dictionary first\n", |
| 357 | + " data_row_dict = datarow['data_row']\n", |
| 358 | + " folder = data_row_dict['external_id'].split(\"/\")[0]\n", |
| 359 | + " id = data_row_dict['external_id'].split(\"/\")[1]\n", |
| 360 | + " \n", |
260 | 361 | " if folder == \"positive_image_set\":\n", |
261 | 362 | " for image in annotations['images']:\n", |
262 | 363 | " if image['file_name'] == id:\n", |
|
275 | 376 | " name=class_name,\n", |
276 | 377 | " value=Rectangle(start=Point(x=bbox[0], y=bbox[1]), end=Point(x=bbox[2]+bbox[0], y=bbox[3]+bbox[1]))\n", |
277 | 378 | " ))\n", |
278 | | - " image_data = ImageData(uid=datarow['id'])\n", |
279 | | - " labels.append(Label(data=image_data, annotations=annotations_list))\n" |
280 | | - ], |
281 | | - "cell_type": "code", |
282 | | - "outputs": [], |
283 | | - "execution_count": null |
284 | | - }, |
285 | | - { |
286 | | - "metadata": {}, |
287 | | - "source": [ |
288 | | - "print(labels)" |
| 379 | + " image_data = ImageData(uid=data_row_dict['id'])\n", |
| 380 | + " labels.append(Label(data=image_data, annotations=annotations_list))" |
289 | 381 | ], |
290 | 382 | "cell_type": "code", |
291 | 383 | "outputs": [], |
|
0 commit comments