Skip to content

Commit 9844d5d

Browse files
authored
[SN-122]Add streamable export methods and update links (#1514)
2 parents 705a328 + 30ee877 commit 9844d5d

File tree

1 file changed

+110
-18
lines changed

1 file changed

+110
-18
lines changed

examples/annotation_import/import_labeled_dataset_image.ipynb

Lines changed: 110 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,33 @@
33
"nbformat_minor": 0,
44
"metadata": {},
55
"cells": [
6+
{
7+
"metadata": {},
8+
"source": [
9+
"<td>\n",
10+
" <a target=\"_blank\" href=\"https://labelbox.com\" ><img src=\"https://labelbox.com/blog/content/images/2021/02/logo-v4.svg\" width=256/></a>\n",
11+
"</td>"
12+
],
13+
"cell_type": "markdown"
14+
},
15+
{
16+
"metadata": {},
17+
"source": [
18+
"<td>\n",
19+
"<a href=\"https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/annotation_import/import_labeled_dataset_image.ipynb\" target=\"_blank\"><img\n",
20+
"src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>\n",
21+
"</td>\n",
22+
"\n",
23+
"\n",
24+
"\n",
25+
"\n",
26+
"<td>\n",
27+
"<a href=\"https://github.com/Labelbox/labelbox-python/blob/master/examples/annotation_import/import_labeled_dataset_image.ipynb\" target=\"_blank\"><img\n",
28+
"src=\"https://img.shields.io/badge/GitHub-100000?logo=github&logoColor=white\" alt=\"GitHub\"></a>\n",
29+
"</td>"
30+
],
31+
"cell_type": "markdown"
32+
},
633
{
734
"metadata": {},
835
"source": [
@@ -229,8 +256,43 @@
229256
{
230257
"metadata": {},
231258
"source": [
232-
"# Export data rows from the dataset\n",
233-
"data_rows = [dr.uid for dr in dataset.export_data_rows()]\n",
259+
"# Send a batch of data rows to the project"
260+
],
261+
"cell_type": "markdown"
262+
},
263+
{
264+
"metadata": {},
265+
"source": [
266+
"client.enable_experimental = True\n",
267+
"\n",
268+
"# Minimal ExportV2 parameters focused solely on data row IDs\n",
269+
"export_params = {\n",
270+
" \"data_row_details\": True # Only export data row details \n",
271+
"}\n",
272+
"\n",
273+
"# Initiate the streamable export task from catalog\n",
274+
"dataset = client.get_dataset(dataset.uid) # Update with the actual dataset ID\n",
275+
"export_task = dataset.export(params=export_params)\n",
276+
"export_task.wait_till_done()\n",
277+
"print(export_task)\n",
278+
"\n",
279+
"data_rows = []\n",
280+
"\n",
281+
"# Callback used for JSON Converter to correctly collect data row IDs\n",
282+
"def json_stream_handler(output: lb.JsonConverterOutput):\n",
283+
" # Parse the JSON string to access the data\n",
284+
" data = json.loads(output.json_str)\n",
285+
"\n",
286+
" # Correctly extract and append DataRow ID\n",
287+
" if 'data_row' in data and 'id' in data['data_row']:\n",
288+
" data_rows.append(data['data_row']['id'])\n",
289+
"\n",
290+
"# Process the stream if there are results\n",
291+
"if export_task.has_result():\n",
292+
" export_task.get_stream(\n",
293+
" converter=lb.JsonConverter(),\n",
294+
" stream_type=lb.StreamType.RESULT\n",
295+
" ).start(stream_handler=json_stream_handler)\n",
234296
"\n",
235297
"# Randomly select 200 Data Rows (or fewer if the dataset has less than 200 data rows)\n",
236298
"sampled_data_rows = random.sample(data_rows, min(len(data_rows), 200))\n",
@@ -250,13 +312,52 @@
250312
{
251313
"metadata": {},
252314
"source": [
253-
"queued_data_rows = project.export_queued_data_rows()\n",
254-
"labels = []\n",
315+
"# Create annotations payload"
316+
],
317+
"cell_type": "markdown"
318+
},
319+
{
320+
"metadata": {},
321+
"source": [
322+
"\n",
323+
"# Set export parameters focused on data row details\n",
324+
"export_params = {\n",
325+
" \"data_row_details\": True, # Only export data row details\n",
326+
" \"batch_ids\": [batch.uid], # Optional: Include batch ids to filter by specific batches\n",
327+
"}\n",
328+
"\n",
329+
"# Initialize the streamable export task from project\n",
330+
"export_task = project.export(params=export_params)\n",
331+
"export_task.wait_till_done()\n",
332+
"\n",
333+
"data_rows = []\n",
334+
"\n",
335+
"def json_stream_handler(output: lb.JsonConverterOutput):\n",
336+
" data_row = json.loads(output.json_str)\n",
337+
" data_rows.append(data_row)\n",
255338
"\n",
256-
"for datarow in queued_data_rows:\n",
339+
"\n",
340+
"if export_task.has_errors():\n",
341+
" export_task.get_stream(\n",
342+
" \n",
343+
" converter=lb.JsonConverter(),\n",
344+
" stream_type=lb.StreamType.ERRORS\n",
345+
" ).start(stream_handler=lambda error: print(error))\n",
346+
"\n",
347+
"if export_task.has_result():\n",
348+
" export_json = export_task.get_stream(\n",
349+
" converter=lb.JsonConverter(),\n",
350+
" stream_type=lb.StreamType.RESULT\n",
351+
" ).start(stream_handler=json_stream_handler)\n",
352+
"\n",
353+
"labels = []\n",
354+
"for datarow in data_rows:\n",
257355
" annotations_list = []\n",
258-
" folder = datarow['externalId'].split(\"/\")[0]\n",
259-
" id = datarow['externalId'].split(\"/\")[1]\n",
356+
" # Access the 'data_row' dictionary first\n",
357+
" data_row_dict = datarow['data_row']\n",
358+
" folder = data_row_dict['external_id'].split(\"/\")[0]\n",
359+
" id = data_row_dict['external_id'].split(\"/\")[1]\n",
360+
" \n",
260361
" if folder == \"positive_image_set\":\n",
261362
" for image in annotations['images']:\n",
262363
" if image['file_name'] == id:\n",
@@ -275,17 +376,8 @@
275376
" name=class_name,\n",
276377
" value=Rectangle(start=Point(x=bbox[0], y=bbox[1]), end=Point(x=bbox[2]+bbox[0], y=bbox[3]+bbox[1]))\n",
277378
" ))\n",
278-
" image_data = ImageData(uid=datarow['id'])\n",
279-
" labels.append(Label(data=image_data, annotations=annotations_list))\n"
280-
],
281-
"cell_type": "code",
282-
"outputs": [],
283-
"execution_count": null
284-
},
285-
{
286-
"metadata": {},
287-
"source": [
288-
"print(labels)"
379+
" image_data = ImageData(uid=data_row_dict['id'])\n",
380+
" labels.append(Label(data=image_data, annotations=annotations_list))"
289381
],
290382
"cell_type": "code",
291383
"outputs": [],

0 commit comments

Comments
 (0)