Skip to content

Commit f926f5a

Browse files
authored
Merge pull request #1006 from Labelbox/ao_global_keys_notebooks
- Replace data row ids with global keys - Replace create_data_row with create_data_rows - Added task methods for failed data rows - Updated text - Added data class types for creating labels - Added new supported annotations (NER annotation type for pdf) - Added newly supported relationship annotation to image notebook - Refactor video notebook to include annotation types Pending: HTML annotation import notebook Add annotation types to Video prediction pdf prediction notebook
2 parents 422dd6d + 13e09b0 commit f926f5a

File tree

11 files changed

+776
-1000
lines changed

11 files changed

+776
-1000
lines changed

examples/annotation_import/conversational.ipynb

Lines changed: 72 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -78,17 +78,7 @@
7878
"!pip install -q 'labelbox[data]'"
7979
],
8080
"cell_type": "code",
81-
"outputs": [
82-
{
83-
"name": "stdout",
84-
"output_type": "stream",
85-
"text": [
86-
"\u001b[K |\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 185 kB 29.6 MB/s \n",
87-
"\u001b[K |\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 7.8 MB 53.9 MB/s \n",
88-
"\u001b[?25h Building wheel for pygeotile (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
89-
]
90-
}
91-
],
81+
"outputs": [],
9282
"execution_count": null
9383
},
9484
{
@@ -102,7 +92,8 @@
10292
"metadata": {},
10393
"source": [
10494
"import labelbox as lb\n",
105-
"import uuid"
95+
"import uuid\n",
96+
"import labelbox.types as lb_types"
10697
],
10798
"cell_type": "code",
10899
"outputs": [],
@@ -120,7 +111,7 @@
120111
"metadata": {},
121112
"source": [
122113
"# Add your api key\n",
123-
"API_KEY = None\n",
114+
"API_KEY = \"\"\n",
124115
"client = lb.Client(api_key=API_KEY)"
125116
],
126117
"cell_type": "code",
@@ -146,32 +137,61 @@
146137
"source": [
147138
"# message based classifications\n",
148139
"\n",
149-
"ner_annotation = { \n",
140+
"ner_annotation_ndjson = { \n",
150141
" \"name\": \"ner\",\n",
151142
" \"location\": { \n",
152143
" \"start\": 0, \n",
153144
" \"end\": 8 \n",
154145
" },\n",
155146
" \"messageId\": \"4\"\n",
156-
" }\n",
147+
" }"
148+
],
149+
"cell_type": "code",
150+
"outputs": [],
151+
"execution_count": null
152+
},
153+
{
154+
"metadata": {},
155+
"source": [
156+
"##### Classification free text #####\n",
157157
"\n",
158-
"text_annotation = {\n",
158+
"# Only supported with NDJSON\n",
159+
"text_annotation_ndjson = {\n",
159160
" 'name': 'text_convo',\n",
160161
" 'answer': 'the answer to the text questions right here',\n",
161162
" 'messageId': \"0\"\n",
162-
"}\n",
163-
"\n",
163+
"}"
164+
],
165+
"cell_type": "code",
166+
"outputs": [],
167+
"execution_count": null
168+
},
169+
{
170+
"metadata": {},
171+
"source": [
172+
"##### Checklist Classification ####### \n",
164173
"\n",
165-
"checklist_annotation = {\n",
174+
"# Only supported with NDJSON\n",
175+
"checklist_annotation_ndjson = {\n",
166176
" 'name': 'checklist_convo',\n",
167177
" 'answers': [\n",
168178
" {'name': 'first_checklist_answer'},\n",
169179
" {'name': 'second_checklist_answer'}\n",
170180
" ],\n",
171181
" 'messageId': '2'\n",
172-
"}\n",
182+
"}"
183+
],
184+
"cell_type": "code",
185+
"outputs": [],
186+
"execution_count": null
187+
},
188+
{
189+
"metadata": {},
190+
"source": [
191+
"######## Radio Classification ######\n",
173192
"\n",
174-
"radio_annotation = {\n",
193+
"# Only supported with NDJSON\n",
194+
"radio_annotation_ndjson = {\n",
175195
" 'name': 'radio_convo',\n",
176196
" 'answer': {\n",
177197
" 'name': 'first_radio_answer'\n",
@@ -201,47 +221,32 @@
201221
"metadata": {},
202222
"source": [
203223
"# Create one Labelbox dataset\n",
204-
"dataset = client.create_dataset(name=\"conversational_annotation_import_demo_dataset\")\n",
224+
"\n",
225+
"global_key = \"conversation-1.json\"\n",
205226
"\n",
206227
"asset = {\n",
207228
" \"row_data\": \"https://storage.googleapis.com/labelbox-developer-testing-assets/conversational_text/1000-conversations/conversation-1.json\",\n",
208-
" \"global_key\": str(uuid.uuid1())\n",
229+
" \"global_key\": global_key\n",
209230
"}\n",
210231
"\n",
211-
"\n",
212-
"data_row = dataset.create_data_row(asset)\n",
213-
"print(data_row)\n"
232+
"dataset = client.create_dataset(name=\"conversational_annotation_import_demo_dataset\")\n",
233+
"task = dataset.create_data_rows([asset])\n",
234+
"task.wait_till_done()\n",
235+
"print(\"Errors:\", task.errors)\n",
236+
"print(\"Failed data rows: \", task.failed_data_rows)"
214237
],
215238
"cell_type": "code",
216-
"outputs": [
217-
{
218-
"name": "stdout",
219-
"output_type": "stream",
220-
"text": [
221-
"<DataRow {\n",
222-
" \"created_at\": \"2022-12-23 20:18:48+00:00\",\n",
223-
" \"external_id\": null,\n",
224-
" \"global_key\": \"0206acac-82ff-11ed-a415-0242ac1c000c\",\n",
225-
" \"media_attributes\": {},\n",
226-
" \"metadata\": [],\n",
227-
" \"metadata_fields\": [],\n",
228-
" \"row_data\": \"https://storage.googleapis.com/labelbox-developer-testing-assets/conversational_text/1000-conversations/conversation-1.json\",\n",
229-
" \"uid\": \"clc0ygvde029307yn96gv2byu\",\n",
230-
" \"updated_at\": \"2022-12-23 20:18:48+00:00\"\n",
231-
"}>\n"
232-
]
233-
}
234-
],
239+
"outputs": [],
235240
"execution_count": null
236241
},
237242
{
238243
"metadata": {},
239244
"source": [
240245
"## Step 2: Create/select an ontology\n",
241246
"\n",
242-
"Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name`/`instructions` fields in your annotations to ensure the correct feature schemas are matched.\n",
247+
"Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name` fields in your annotations to ensure the correct feature schemas are matched.\n",
243248
"\n",
244-
"For example, when we create the bounding box annotation [above](https://colab.research.google.com/drive/1rFv-VvHUBbzFYamz6nSMRJz1mEg6Ukqq#scrollTo=3umnTd-MfI0o&line=1&uniqifier=1), we provided the `name` as `text_convo`. Now, when we setup our ontology, we must ensure that the name of my bounding box tool is also `checklist_convo`. The same alignment must hold true for the other tools and classifications we create in our ontology."
249+
"For example, when we create the text annotation, we provided the `name` as `text_convo`. Now, when we setup our ontology, we must ensure that the name of the tool is also `text_convo`. The same alignment must hold true for the other tools and classifications we create in our ontology."
245250
],
246251
"cell_type": "markdown"
247252
},
@@ -254,11 +259,11 @@
254259
" tool=lb.Tool.Type.NER, \n",
255260
" name=\"ner\")], \n",
256261
" classifications=[ \n",
257-
" lb.Classification( # Text classification given the name \"text\"\n",
262+
" lb.Classification( \n",
258263
" class_type=lb.Classification.Type.TEXT,\n",
259264
" scope=lb.Classification.Scope.INDEX, \n",
260265
" instructions=\"text_convo\"), \n",
261-
" lb.Classification( # Checklist classification given the name \"text\" with two options: \"first_checklist_answer\" and \"second_checklist_answer\"\n",
266+
" lb.Classification( \n",
262267
" class_type=lb.Classification.Type.CHECKLIST, \n",
263268
" scope=lb.Classification.Scope.INDEX, \n",
264269
" instructions=\"checklist_convo\", \n",
@@ -267,7 +272,7 @@
267272
" lb.Option(value=\"second_checklist_answer\") \n",
268273
" ]\n",
269274
" ), \n",
270-
" lb.Classification( # Radio classification given the name \"text\" with two options: \"first_radio_answer\" and \"second_radio_answer\"\n",
275+
" lb.Classification( \n",
271276
" class_type=lb.Classification.Type.RADIO, \n",
272277
" instructions=\"radio_convo\", \n",
273278
" scope=lb.Classification.Scope.INDEX, \n",
@@ -277,7 +282,9 @@
277282
" ]\n",
278283
" )\n",
279284
" ]\n",
280-
")"
285+
")\n",
286+
"\n",
287+
"ontology = client.create_ontology(\"Ontology Conversation Annotations\", ontology_builder.asdict(), media_type=lb.MediaType.Conversational)"
281288
],
282289
"cell_type": "code",
283290
"outputs": [],
@@ -296,13 +303,11 @@
296303
"metadata": {},
297304
"source": [
298305
"# Create Labelbox project\n",
299-
"project = client.create_project(name=\"conversational_mal_project\", \n",
306+
"project = client.create_project(name=\"conversational_project\", \n",
300307
" media_type=lb.MediaType.Conversational)\n",
301308
"\n",
302-
"# Setup your ontology / labeling editor\n",
303-
"editor = next(client.get_labeling_frontends(where=lb.LabelingFrontend.name == \"Editor\")) # Unless using a custom editor, do not modify this\n",
304-
"\n",
305-
"project.setup(editor, ontology_builder.asdict()) # Connect your ontology and editor to your project\n"
309+
"# Setup your ontology \n",
310+
"project.setup_editor(ontology) # Connect your ontology and editor to your project"
306311
],
307312
"cell_type": "code",
308313
"outputs": [
@@ -331,8 +336,8 @@
331336
"# Create a batch to send to your MAL project\n",
332337
"batch = project.create_batch(\n",
333338
" \"first-batch-convo-demo\", # Each batch in a project must have a unique name\n",
334-
" [data_row.uid], # Paginated collection of data row objects\n",
335-
" 5 # priority between 1(Highest) - 5(lowest)\n",
339+
" global_keys=global_key, # Paginated collection of data row objects, list of data row ids or global keys\n",
340+
" priority=5 # priority between 1(Highest) - 5(lowest)\n",
336341
")\n",
337342
"\n",
338343
"print(\"Batch: \", batch)"
@@ -378,13 +383,13 @@
378383
"metadata": {},
379384
"source": [
380385
"label_ndjson = []\n",
381-
"for annotations in [ner_annotation,\n",
382-
" text_annotation,\n",
383-
" checklist_annotation,\n",
384-
" radio_annotation]:\n",
386+
"for annotations in [ner_annotation_ndjson,\n",
387+
" text_annotation_ndjson,\n",
388+
" checklist_annotation_ndjson,\n",
389+
" radio_annotation_ndjson]:\n",
385390
" annotations.update({\n",
386391
" 'dataRow': {\n",
387-
" 'id': data_row.uid\n",
392+
" 'globalKey': global_key\n",
388393
" }\n",
389394
" })\n",
390395
" label_ndjson.append(annotations)"
@@ -393,42 +398,6 @@
393398
"outputs": [],
394399
"execution_count": null
395400
},
396-
{
397-
"metadata": {},
398-
"source": [
399-
"label_ndjson"
400-
],
401-
"cell_type": "code",
402-
"outputs": [
403-
{
404-
"data": {
405-
"text/plain": [
406-
"[{'name': 'ner',\n",
407-
" 'location': {'start': 0, 'end': 8},\n",
408-
" 'messageId': '4',\n",
409-
" 'dataRow': {'id': 'clc0ygvde029307yn96gv2byu'}},\n",
410-
" {'name': 'text_convo',\n",
411-
" 'answer': 'the answer to the text questions right here',\n",
412-
" 'messageId': '0',\n",
413-
" 'dataRow': {'id': 'clc0ygvde029307yn96gv2byu'}},\n",
414-
" {'name': 'checklist_convo',\n",
415-
" 'answers': [{'name': 'first_checklist_answer'},\n",
416-
" {'name': 'second_checklist_answer'}],\n",
417-
" 'messageId': '2',\n",
418-
" 'dataRow': {'id': 'clc0ygvde029307yn96gv2byu'}},\n",
419-
" {'name': 'radio_convo',\n",
420-
" 'answer': {'name': 'first_radio_answer'},\n",
421-
" 'messageId': '0',\n",
422-
" 'dataRow': {'id': 'clc0ygvde029307yn96gv2byu'}}]"
423-
]
424-
},
425-
"execution_count": 13,
426-
"metadata": {},
427-
"output_type": "execute_result"
428-
}
429-
],
430-
"execution_count": null
431-
},
432401
{
433402
"metadata": {},
434403
"source": [
@@ -456,7 +425,7 @@
456425
"\n",
457426
"upload_job.wait_until_done();\n",
458427
"print(\"Errors:\", upload_job.errors)\n",
459-
"print(\" \")"
428+
"print(\"Status of uploads: \", upload_job.statuses)"
460429
],
461430
"cell_type": "code",
462431
"outputs": [
@@ -471,42 +440,6 @@
471440
],
472441
"execution_count": null
473442
},
474-
{
475-
"metadata": {},
476-
"source": [
477-
"annotations"
478-
],
479-
"cell_type": "code",
480-
"outputs": [
481-
{
482-
"data": {
483-
"text/plain": [
484-
"[{'name': 'ner', 'location': {'start': 0, 'end': 8}, 'messageId': '4'},\n",
485-
" {'name': 'text_convo',\n",
486-
" 'answer': 'the answer to the text questions right here',\n",
487-
" 'uuid': '0ae2b42b-0e01-4bd6-8e4f-5ebfe6402a05',\n",
488-
" 'dataRow': {'id': 'clc0okhr74aq607yb6fv83crl'},\n",
489-
" 'messageId': '0'},\n",
490-
" {'name': 'checklist_convo',\n",
491-
" 'uuid': '8a382c09-da4c-455f-80ee-16fb05165e4a',\n",
492-
" 'answers': [{'name': 'first_checklist_answer'},\n",
493-
" {'name': 'second_checklist_answer'}],\n",
494-
" 'dataRow': {'id': 'clc0okhr74aq607yb6fv83crl'},\n",
495-
" 'messageId': '2'},\n",
496-
" {'name': 'radio_convo',\n",
497-
" 'uuid': '515e4f05-6cf1-4e8c-b183-d633c49f5106',\n",
498-
" 'dataRow': {'id': 'clc0okhr74aq607yb6fv83crl'},\n",
499-
" 'answer': {'name': 'first_radio_answer'},\n",
500-
" 'messageId': '0'}]"
501-
]
502-
},
503-
"execution_count": 45,
504-
"metadata": {},
505-
"output_type": "execute_result"
506-
}
507-
],
508-
"execution_count": null
509-
},
510443
{
511444
"metadata": {},
512445
"source": [
@@ -521,12 +454,13 @@
521454
"upload_job = lb.LabelImport.create_from_objects(\n",
522455
" client = client, \n",
523456
" project_id = project.uid, \n",
524-
" name=\"label_geo_import_job\"+str(uuid.uuid4()), \n",
457+
" name=\"label_import_job\"+str(uuid.uuid4()), \n",
525458
" # user label_ndjson if labels were created using python annotation tools\n",
526459
" labels=label_ndjson)\n",
527460
"\n",
528461
"upload_job.wait_until_done();\n",
529-
"print(\"Errors:\", upload_job.errors)"
462+
"print(\"Errors:\", upload_job.errors)\n",
463+
"print(\"Status of uploads: \", upload_job.statuses)"
530464
],
531465
"cell_type": "code",
532466
"outputs": [

0 commit comments

Comments
 (0)