Skip to content

Commit fbc25f2

Browse files
ovalle15ovalle15
authored andcommitted
Added NER and BBOX with subclass ndjson
1 parent 7746ed1 commit fbc25f2

File tree

2 files changed

+145
-2
lines changed

2 files changed

+145
-2
lines changed

examples/annotation_import/conversational.ipynb

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,14 @@
136136
"metadata": {},
137137
"source": [
138138
"# message based classifications\n",
139+
"ner_annotation = lb_types.ObjectAnnotation(\n",
140+
" name=\"ner\",\n",
141+
" value=lb_types.ConversationEntity(\n",
142+
" start=0,\n",
143+
" end=8,\n",
144+
" message_id=\"4\"\n",
145+
" )\n",
146+
")\n",
139147
"\n",
140148
"ner_annotation_ndjson = { \n",
141149
" \"name\": \"ner\",\n",
@@ -348,6 +356,33 @@
348356
],
349357
"cell_type": "markdown"
350358
},
359+
{
360+
"metadata": {},
361+
"source": [
362+
"#### Python annotation\n",
363+
"Here we create the complete labels ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. "
364+
],
365+
"cell_type": "markdown"
366+
},
367+
{
368+
"metadata": {},
369+
"source": [
370+
"label = []\n",
371+
"label.append(\n",
372+
" lb_types.Label(\n",
373+
" data=lb_types.ConversationData(\n",
374+
" global_key=global_key\n",
375+
" ),\n",
376+
" annotations=[\n",
377+
" ner_annotation\n",
378+
" ]\n",
379+
" )\n",
380+
")"
381+
],
382+
"cell_type": "code",
383+
"outputs": [],
384+
"execution_count": null
385+
},
351386
{
352387
"metadata": {},
353388
"source": [

examples/annotation_import/pdf.ipynb

Lines changed: 110 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,62 @@
273273
"outputs": [],
274274
"execution_count": null
275275
},
276+
{
277+
"metadata": {},
278+
"source": [
279+
"######### BBOX with nested classifications #########\n",
280+
"\n",
281+
"bbox_with_radio_subclass_annotation_ndjson = {\n",
282+
" 'name': 'bbox_with_radio_subclass',\n",
283+
" 'classifications': [\n",
284+
" {\n",
285+
" 'name': 'sub_radio_question',\n",
286+
" 'answer': {'name': 'first_sub_radio_answer'}\n",
287+
" }\n",
288+
" ],\n",
289+
" 'bbox': {\n",
290+
" \"top\": 214.894,\n",
291+
" \"left\": 189.215,\n",
292+
" \"height\": 264,\n",
293+
" \"width\": 240.573\n",
294+
" },\n",
295+
" 'page': 1,\n",
296+
" 'unit': \"POINTS\"\n",
297+
"}"
298+
],
299+
"cell_type": "code",
300+
"outputs": [],
301+
"execution_count": null
302+
},
303+
{
304+
"metadata": {},
305+
"source": [
306+
"############ NER with nested classifications ######## \n",
307+
"\n",
308+
"ner_with_checklist_subclass_annotation_ndjson = {\n",
309+
" 'name': 'ner_with_checklist_subclass',\n",
310+
" 'classifications':[\n",
311+
" {\n",
312+
" 'name': 'sub_checklist_question',\n",
313+
" 'answer': [{'name': 'first_sub_checklist_answer'}] \n",
314+
" }\n",
315+
" ],\n",
316+
" 'textSelections': [\n",
317+
" {\n",
318+
" \"tokenIds\": [\n",
319+
" \"<UUID>\",\n",
320+
" ],\n",
321+
" \"groupId\": \"<UUID>\",\n",
322+
" \"page\": 1,\n",
323+
" }\n",
324+
" ] \n",
325+
"}\n",
326+
" \n"
327+
],
328+
"cell_type": "code",
329+
"outputs": [],
330+
"execution_count": null
331+
},
276332
{
277333
"metadata": {},
278334
"source": [
@@ -388,7 +444,34 @@
388444
" name=\"bounding_box\"), \n",
389445
" lb.Tool(\n",
390446
" tool=lb.Tool.Type.NER, \n",
391-
" name=\"named_entity\")]\n",
447+
" name=\"named_entity\"),\n",
448+
" lb.Tool(\n",
449+
" tool=lb.Tool.Type.NER, \n",
450+
" name=\"ner_with_checklist_subclass\",\n",
451+
" classifications=[\n",
452+
" lb.Classification(\n",
453+
" class_type=lb.Classification.Type.CHECKLIST,\n",
454+
" name=\"sub_checklist_question\",\n",
455+
" options=[\n",
456+
" lb.Option(value=\"first_sub_checklist_answer\")\n",
457+
" ]\n",
458+
" )\n",
459+
" ]\n",
460+
" ),\n",
461+
" lb.Tool( \n",
462+
" tool=lb.Tool.Type.BBOX,\n",
463+
" name=\"bbox_with_radio_subclass\",\n",
464+
" classifications=[\n",
465+
" lb.Classification(\n",
466+
" class_type=lb.Classification.Type.RADIO,\n",
467+
" name=\"sub_radio_question\",\n",
468+
" options=[\n",
469+
" lb.Option(value=\"first_sub_radio_answer\")\n",
470+
" ]\n",
471+
" )\n",
472+
" ]\n",
473+
" )\n",
474+
" ]\n",
392475
")\n",
393476
"\n",
394477
"ontology = client.create_ontology(\"Document Annotation Import Demo\",\n",
@@ -487,7 +570,6 @@
487570
" # build text selections for Annotation Types\n",
488571
" document_text_selection = lb_types.DocumentTextSelection(groupId=group['id'], tokenIds=list_tokens, page=1)\n",
489572
" text_selections.append(document_text_selection)\n",
490-
" \n",
491573
" # build text selection for the NDJson annotation\n",
492574
" entities_annotations_ndjson.update(\n",
493575
" {\n",
@@ -497,23 +579,47 @@
497579
" \"tokenIds\": list_tokens, #id associated with each word in a sentence group\n",
498580
" \"page\": 1,\n",
499581
" }\n",
582+
" ]}\n",
583+
" )\n",
584+
" if group['content'] == \"T. Sasaki,* N. Yoneyama, and N. Kobayashi\":\n",
585+
" list_tokens_2 = [x['id'] for x in group['tokens']]\n",
586+
" ner_with_checklist_subclass_annotation_ndjson.update(\n",
587+
" {\n",
588+
" \"textSelections\": [\n",
589+
" {\n",
590+
" \"groupId\": group['id'], #id associated with the group of words\n",
591+
" \"tokenIds\": list_tokens_2, #id associated with each word in a sentence group\n",
592+
" \"page\": 1,\n",
593+
" }\n",
500594
" ]\n",
501595
" }\n",
502596
" )\n",
597+
" \n",
503598
"# re-write the entity annotation with text selections (annotation types)\n",
504599
"entities_annotation_document_entity = lb_types.DocumentEntity(name=\"named_entity\", \n",
505600
" textSelections = text_selections)\n",
506601
"entities_annotation = lb_types.ObjectAnnotation(name=\"named_entity\",\n",
507602
" value=entities_annotation_document_entity)\n",
603+
"\n",
508604
" \n",
509605
"print(f\"entities_annotations_ndjson={entities_annotations_ndjson}\")\n",
510606
"print(f\"entities_annotation={entities_annotation}\")\n",
607+
"print(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation_ndjson}\")\n",
511608
" "
512609
],
513610
"cell_type": "code",
514611
"outputs": [],
515612
"execution_count": null
516613
},
614+
{
615+
"metadata": {},
616+
"source": [
617+
"ner_with_checklist_subclass_annotation_ndjson"
618+
],
619+
"cell_type": "code",
620+
"outputs": [],
621+
"execution_count": null
622+
},
517623
{
518624
"metadata": {},
519625
"source": [
@@ -565,6 +671,8 @@
565671
" text_annotation_ndjson,\n",
566672
" checklist_annotation_ndjson,\n",
567673
" nested_checklist_annotation_ndjson,\n",
674+
" bbox_with_radio_subclass_annotation_ndjson,\n",
675+
" ner_with_checklist_subclass_annotation_ndjson,\n",
568676
" nested_radio_annotation_ndjson,\n",
569677
" radio_annotation_ndjson\n",
570678
" ]:\n",

0 commit comments

Comments
 (0)