Skip to content

Commit e10c922

Browse files
authored
[SN-105] Update notebooks to new export methods (#1398)
1 parent 64badb2 commit e10c922

File tree

5 files changed

+346
-352
lines changed

5 files changed

+346
-352
lines changed

examples/basics/batches.ipynb

Lines changed: 143 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@
6262
"source": [
6363
"import labelbox as lb\n",
6464
"import random\n",
65-
"import uuid"
65+
"import uuid\n",
66+
"import json"
6667
],
6768
"cell_type": "code",
6869
"outputs": [],
@@ -154,7 +155,39 @@
154155
{
155156
"metadata": {},
156157
"source": [
157-
"global_keys = [data_row.global_key for data_row in dataset.export_data_rows()]\n",
158+
"client.enable_experimental = True\n",
159+
"\n",
160+
"export_task = dataset.export()\n",
161+
"export_task.wait_till_done()\n",
162+
"\n",
163+
"data_rows = []\n",
164+
"\n",
165+
"def json_stream_handler(output: lb.JsonConverterOutput):\n",
166+
" data_row = json.loads(output.json_str)\n",
167+
" data_rows.append(data_row)\n",
168+
"\n",
169+
"\n",
170+
"if export_task.has_errors():\n",
171+
" export_task.get_stream(\n",
172+
" \n",
173+
" converter=lb.JsonConverter(),\n",
174+
" stream_type=lb.StreamType.ERRORS\n",
175+
" ).start(stream_handler=lambda error: print(error))\n",
176+
"\n",
177+
"if export_task.has_result():\n",
178+
" export_json = export_task.get_stream(\n",
179+
" converter=lb.JsonConverter(),\n",
180+
" stream_type=lb.StreamType.RESULT\n",
181+
" ).start(stream_handler=json_stream_handler)"
182+
],
183+
"cell_type": "code",
184+
"outputs": [],
185+
"execution_count": null
186+
},
187+
{
188+
"metadata": {},
189+
"source": [
190+
"global_keys = [data_row[\"data_row\"][\"global_key\"] for data_row in data_rows]\n",
158191
"print(\"Number of global keys:\", len(global_keys))"
159192
],
160193
"cell_type": "code",
@@ -277,23 +310,127 @@
277310
{
278311
"metadata": {},
279312
"source": [
280-
"## Manage batches\n",
313+
"## Manage Batches\n",
281314
"Note: You can view your batch data through the **Data Rows** tab."
282315
],
283316
"cell_type": "markdown"
284317
},
285318
{
286319
"metadata": {},
287320
"source": [
288-
"### View batches"
321+
"### Export Batches"
322+
],
323+
"cell_type": "markdown"
324+
},
325+
{
326+
"metadata": {},
327+
"source": [
328+
"Batches will need to be exported from your project as a export parameter. Before you can export from a project you will need an ontology attached."
329+
],
330+
"cell_type": "markdown"
331+
},
332+
{
333+
"metadata": {},
334+
"source": [
335+
"#### Create and Attach Ontology to Project"
336+
],
337+
"cell_type": "markdown"
338+
},
339+
{
340+
"metadata": {},
341+
"source": [
342+
"classification_features = [\n",
343+
" lb.Classification(\n",
344+
" class_type=lb.Classification.Type.CHECKLIST,\n",
345+
" name=\"Quality Issues\",\n",
346+
" options=[\n",
347+
" lb.Option(value=\"blurry\", label=\"Blurry\"),\n",
348+
" lb.Option(value=\"distorted\", label=\"Distorted\")\n",
349+
" ]\n",
350+
" )\n",
351+
"]\n",
352+
"\n",
353+
"ontology_builder = lb.OntologyBuilder(\n",
354+
" tools=[],\n",
355+
" classifications=classification_features\n",
356+
")\n",
357+
"\n",
358+
"ontology = client.create_ontology(\n",
359+
" \"Ontology from new features\",\n",
360+
" ontology_builder.asdict(),\n",
361+
" media_type=lb.MediaType.Image\n",
362+
")\n",
363+
"\n",
364+
"project.setup_editor(ontology)"
365+
],
366+
"cell_type": "code",
367+
"outputs": [],
368+
"execution_count": null
369+
},
370+
{
371+
"metadata": {},
372+
"source": [
373+
"#### Export from Project"
289374
],
290375
"cell_type": "markdown"
291376
},
377+
{
378+
"metadata": {},
379+
"source": [
380+
"client.enable_experimental = True\n",
381+
"\n",
382+
"export_params = {\n",
383+
" \"attachments\": True,\n",
384+
" \"metadata_fields\": True,\n",
385+
" \"data_row_details\": True,\n",
386+
" \"project_details\": True,\n",
387+
" \"performance_details\": True,\n",
388+
" \"batch_ids\" : [batch.uid] # Include batch ids if you only want to export specific batches, otherwise,\n",
389+
" #you can export all the data without using this parameter\n",
390+
"}\n",
391+
"filters = {}\n",
392+
"\n",
393+
"# A task is returned, this provides additional information about the status of your task, such as\n",
394+
"# any errors encountered\n",
395+
"export_task = project.export(params=export_params, filters=filters)\n",
396+
"export_task.wait_till_done()"
397+
],
398+
"cell_type": "code",
399+
"outputs": [],
400+
"execution_count": null
401+
},
402+
{
403+
"metadata": {},
404+
"source": [
405+
"data_rows = []\n",
406+
"\n",
407+
"def json_stream_handler(output: lb.JsonConverterOutput):\n",
408+
" data_row = json.loads(output.json_str)\n",
409+
" data_rows.append(data_row)\n",
410+
"\n",
411+
"\n",
412+
"if export_task.has_errors():\n",
413+
" export_task.get_stream(\n",
414+
" \n",
415+
" converter=lb.JsonConverter(),\n",
416+
" stream_type=lb.StreamType.ERRORS\n",
417+
" ).start(stream_handler=lambda error: print(error))\n",
418+
"\n",
419+
"if export_task.has_result():\n",
420+
" export_json = export_task.get_stream(\n",
421+
" converter=lb.JsonConverter(),\n",
422+
" stream_type=lb.StreamType.RESULT\n",
423+
" ).start(stream_handler=json_stream_handler)"
424+
],
425+
"cell_type": "code",
426+
"outputs": [],
427+
"execution_count": null
428+
},
292429
{
293430
"metadata": {},
294431
"source": [
295432
"## Export the data row iDs\n",
296-
"data_rows = [dr for dr in batch.export_data_rows()]\n",
433+
"data_rows = [dr for dr in data_rows]\n",
297434
"print(\"Data rows in batch: \", data_rows)\n",
298435
"\n",
299436
"## List the batches in your project\n",
@@ -346,4 +483,4 @@
346483
"execution_count": null
347484
}
348485
]
349-
}
486+
}

examples/basics/custom_embeddings.ipynb

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@
8989
{
9090
"metadata": {},
9191
"source": [
92-
"API_KEY = \"<ADD YOUR LABELBOX CREDENTIALS>\"\n",
92+
"API_KEY = \"\"\n",
9393
"client = lb.Client(API_KEY)\n",
9494
"\n",
9595
"# set LABELBOX_API_KEY in bash\n",
@@ -111,11 +111,49 @@
111111
{
112112
"metadata": {},
113113
"source": [
114+
"client.enable_experimental = True\n",
115+
"\n",
114116
"# get images from a Labelbox dataset\n",
115117
"# Our systems start to process data after 1000 embeddings of each type, for this demo make sure your dataset is over 1000 data rows\n",
116-
"dataset = client.get_dataset(\"<ADD YOUR DATASET ID>\") \n",
117-
"drs = list(dataset.export_data_rows(timeout_seconds=9999))\n",
118-
"data_row_ids = [dr.uid for dr in drs]\n",
118+
"dataset = client.get_dataset(\"<ADD YOUR DATASET ID>\")\n",
119+
"\n",
120+
"export_task = dataset.export()\n",
121+
"export_task.wait_till_done()"
122+
],
123+
"cell_type": "code",
124+
"outputs": [],
125+
"execution_count": null
126+
},
127+
{
128+
"metadata": {},
129+
"source": [
130+
"data_rows = []\n",
131+
"\n",
132+
"def json_stream_handler(output: lb.JsonConverterOutput):\n",
133+
" data_row = json.loads(output.json_str)\n",
134+
" data_rows.append(data_row)\n",
135+
"\n",
136+
"if export_task.has_errors():\n",
137+
" export_task.get_stream(\n",
138+
" converter=lb.JsonConverter(),\n",
139+
" stream_type=lb.StreamType.ERRORS\n",
140+
" ).start(stream_handler=lambda error: print(error))\n",
141+
"\n",
142+
"if export_task.has_result():\n",
143+
" export_json = export_task.get_stream(\n",
144+
" converter=lb.JsonConverter(),\n",
145+
" stream_type=lb.StreamType.RESULT\n",
146+
" ).start(stream_handler=json_stream_handler)"
147+
],
148+
"cell_type": "code",
149+
"outputs": [],
150+
"execution_count": null
151+
},
152+
{
153+
"metadata": {},
154+
"source": [
155+
"data_row_ids = [dr[\"data_row\"][\"id\"] for dr in data_rows]\n",
156+
"\n",
119157
"data_row_ids = data_row_ids[:1000] # keep the first 1000 examples for the sake of this demo"
120158
],
121159
"cell_type": "code",

0 commit comments

Comments
 (0)