|
2 | 2 | "cells": [ |
3 | 3 | { |
4 | 4 | "cell_type": "markdown", |
5 | | - "id": "dcc32476", |
| 5 | + "id": "7ede57bf", |
6 | 6 | "metadata": {}, |
7 | 7 | "source": [ |
8 | 8 | "<td>\n", |
|
12 | 12 | }, |
13 | 13 | { |
14 | 14 | "cell_type": "markdown", |
15 | | - "id": "43328417", |
| 15 | + "id": "e2c69e42", |
16 | 16 | "metadata": {}, |
17 | 17 | "source": [ |
18 | 18 | "<td>\n", |
|
28 | 28 | }, |
29 | 29 | { |
30 | 30 | "cell_type": "markdown", |
31 | | - "id": "6311ec07", |
| 31 | + "id": "cef9c510", |
32 | 32 | "metadata": {}, |
33 | 33 | "source": [ |
34 | 34 | "# PDF Annotation Import\n", |
|
38 | 38 | }, |
39 | 39 | { |
40 | 40 | "cell_type": "markdown", |
41 | | - "id": "e111a6e2", |
| 41 | + "id": "8c877b9c", |
42 | 42 | "metadata": {}, |
43 | 43 | "source": [ |
44 | 44 | "* For information on what types of annotations are supported per data type, refer to this documentation:\n", |
|
47 | 47 | }, |
48 | 48 | { |
49 | 49 | "cell_type": "markdown", |
50 | | - "id": "c6a696c7", |
| 50 | + "id": "e7e5d296", |
51 | 51 | "metadata": {}, |
52 | 52 | "source": [ |
53 | 53 | "* Notes:\n", |
|
57 | 57 | { |
58 | 58 | "cell_type": "code", |
59 | 59 | "execution_count": 1, |
60 | | - "id": "098fd6c8", |
| 60 | + "id": "8d8d554f", |
61 | 61 | "metadata": {}, |
62 | 62 | "outputs": [], |
63 | 63 | "source": [ |
64 | | - "# !pip install -q 'labelbox[data]'\n", |
65 | | - "!pip install -q pip install https://github.com/Labelbox/labelbox-python/archive/refs/heads/jtso/al-3330.zip\n", |
66 | | - "# !pip install -q -e ../../." |
| 64 | + "!pip install -q 'labelbox[data]'" |
67 | 65 | ] |
68 | 66 | }, |
69 | 67 | { |
70 | 68 | "cell_type": "markdown", |
71 | | - "id": "5f5f479e", |
| 69 | + "id": "573525c5", |
72 | 70 | "metadata": {}, |
73 | 71 | "source": [ |
74 | 72 | "# Imports" |
|
77 | 75 | { |
78 | 76 | "cell_type": "code", |
79 | 77 | "execution_count": 2, |
80 | | - "id": "51f77acd", |
| 78 | + "id": "e3522d4b", |
81 | 79 | "metadata": {}, |
82 | 80 | "outputs": [], |
83 | 81 | "source": [ |
|
96 | 94 | }, |
97 | 95 | { |
98 | 96 | "cell_type": "markdown", |
99 | | - "id": "20834a5f", |
| 97 | + "id": "e3036435", |
100 | 98 | "metadata": {}, |
101 | 99 | "source": [ |
102 | 100 | "# API Key and Client\n", |
|
105 | 103 | }, |
106 | 104 | { |
107 | 105 | "cell_type": "code", |
108 | | - "execution_count": 3, |
109 | | - "id": "7e706b58", |
| 106 | + "execution_count": 56, |
| 107 | + "id": "6ffec272", |
110 | 108 | "metadata": {}, |
111 | 109 | "outputs": [], |
112 | 110 | "source": [ |
113 | 111 | "# Add your api key\n", |
114 | | - "API_KEY = \"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2NjOWZtbXc0aGNkMDczOHFpeWM2YW54Iiwib3JnYW5pemF0aW9uSWQiOiJja2N6NmJ1YnVkeWZpMDg1NW8xZHQxZzlzIiwiYXBpS2V5SWQiOiJja2V2cDF2enAwdDg0MDc1N3I2ZWZldGgzIiwiaWF0IjoxNTk5Njc0NzY0LCJleHAiOjIyMzA4MjY3NjR9.iyqPpEWNpfcjcTid5WVkXLi51g22e_l3FrK-DlFJ2mM\"\n", |
| 112 | + "API_KEY = \"YOUR API KEY\"\n", |
115 | 113 | "client = Client(api_key=API_KEY)" |
116 | 114 | ] |
117 | 115 | }, |
118 | 116 | { |
119 | 117 | "cell_type": "markdown", |
120 | | - "id": "f3d70bcf", |
| 118 | + "id": "70c4d820", |
121 | 119 | "metadata": {}, |
122 | 120 | "source": [ |
123 | 121 | "---- \n", |
|
129 | 127 | }, |
130 | 128 | { |
131 | 129 | "cell_type": "markdown", |
132 | | - "id": "9e88fd12", |
| 130 | + "id": "5d7fc082", |
133 | 131 | "metadata": {}, |
134 | 132 | "source": [ |
135 | 133 | "### Project setup" |
136 | 134 | ] |
137 | 135 | }, |
138 | 136 | { |
139 | 137 | "cell_type": "markdown", |
140 | | - "id": "b6b26171", |
| 138 | + "id": "d0bc3c39", |
141 | 139 | "metadata": {}, |
142 | 140 | "source": [ |
143 | 141 | "First, we create an ontology with all the possible tools and classifications supported for PDF. The official list of supported annotations to import can be found here:\n", |
|
148 | 146 | { |
149 | 147 | "cell_type": "code", |
150 | 148 | "execution_count": 4, |
151 | | - "id": "00b4884a", |
| 149 | + "id": "f93aebd8", |
152 | 150 | "metadata": {}, |
153 | 151 | "outputs": [], |
154 | 152 | "source": [ |
|
184 | 182 | { |
185 | 183 | "cell_type": "code", |
186 | 184 | "execution_count": 5, |
187 | | - "id": "e6ce5b06", |
| 185 | + "id": "e290fd6e", |
188 | 186 | "metadata": {}, |
189 | 187 | "outputs": [ |
190 | 188 | { |
|
205 | 203 | { |
206 | 204 | "cell_type": "code", |
207 | 205 | "execution_count": 6, |
208 | | - "id": "c0bbf502", |
| 206 | + "id": "27961645", |
209 | 207 | "metadata": {}, |
210 | 208 | "outputs": [], |
211 | 209 | "source": [ |
|
228 | 226 | }, |
229 | 227 | { |
230 | 228 | "cell_type": "markdown", |
231 | | - "id": "eb7bb40c", |
| 229 | + "id": "db26d55d", |
232 | 230 | "metadata": {}, |
233 | 231 | "source": [ |
234 | 232 | "### Create Label using Annotation Type Objects\n", |
|
237 | 235 | }, |
238 | 236 | { |
239 | 237 | "cell_type": "markdown", |
240 | | - "id": "1ee26ad9", |
| 238 | + "id": "f409d3cc", |
241 | 239 | "metadata": {}, |
242 | 240 | "source": [ |
243 | 241 | "### Object Annotations" |
|
246 | 244 | { |
247 | 245 | "cell_type": "code", |
248 | 246 | "execution_count": 38, |
249 | | - "id": "a877a9d8", |
| 247 | + "id": "5cbe5b5a", |
250 | 248 | "metadata": {}, |
251 | 249 | "outputs": [], |
252 | 250 | "source": [ |
|
263 | 261 | { |
264 | 262 | "cell_type": "code", |
265 | 263 | "execution_count": 39, |
266 | | - "id": "e0c63922", |
| 264 | + "id": "4896ea42", |
267 | 265 | "metadata": {}, |
268 | 266 | "outputs": [], |
269 | 267 | "source": [ |
|
296 | 294 | }, |
297 | 295 | { |
298 | 296 | "cell_type": "markdown", |
299 | | - "id": "d5abfce2", |
| 297 | + "id": "0f79ecc9", |
300 | 298 | "metadata": {}, |
301 | 299 | "source": [ |
302 | 300 | "### Create a Label object with all of our annotations" |
|
305 | 303 | { |
306 | 304 | "cell_type": "code", |
307 | 305 | "execution_count": 51, |
308 | | - "id": "616132d3", |
| 306 | + "id": "64ac1f74", |
309 | 307 | "metadata": {}, |
310 | 308 | "outputs": [ |
311 | 309 | { |
|
342 | 340 | }, |
343 | 341 | { |
344 | 342 | "cell_type": "markdown", |
345 | | - "id": "ef0c28cc", |
| 343 | + "id": "c7988155", |
346 | 344 | "metadata": {}, |
347 | 345 | "source": [ |
348 | 346 | "### Model Assisted Labeling " |
349 | 347 | ] |
350 | 348 | }, |
351 | 349 | { |
352 | 350 | "cell_type": "markdown", |
353 | | - "id": "7364844b", |
| 351 | + "id": "b33ef622", |
354 | 352 | "metadata": {}, |
355 | 353 | "source": [ |
356 | 354 | "To do model-assisted labeling, we need to convert a Label object into an NDJSON. \n", |
|
364 | 362 | { |
365 | 363 | "cell_type": "code", |
366 | 364 | "execution_count": 52, |
367 | | - "id": "b77d5a9e", |
368 | | - "metadata": {}, |
| 365 | + "id": "8794d8aa", |
| 366 | + "metadata": { |
| 367 | + "scrolled": true |
| 368 | + }, |
369 | 369 | "outputs": [ |
370 | 370 | { |
371 | 371 | "data": { |
|
406 | 406 | { |
407 | 407 | "cell_type": "code", |
408 | 408 | "execution_count": 53, |
409 | | - "id": "ab9b89fd", |
| 409 | + "id": "787524ac", |
410 | 410 | "metadata": {}, |
411 | 411 | "outputs": [], |
412 | 412 | "source": [ |
|
420 | 420 | }, |
421 | 421 | { |
422 | 422 | "cell_type": "code", |
423 | | - "execution_count": 54, |
424 | | - "id": "966c5d8c", |
| 423 | + "execution_count": 57, |
| 424 | + "id": "eb5116f9", |
425 | 425 | "metadata": {}, |
426 | | - "outputs": [ |
427 | | - { |
428 | | - "name": "stdout", |
429 | | - "output_type": "stream", |
430 | | - "text": [ |
431 | | - "Errors: [{'uuid': 'fc1913c6-b735-4dea-bd25-c18152a4715f', 'dataRow': {'id': 'cl76eelhd0fm3071n9ropffkr'}, 'status': 'FAILURE', 'errors': [{'name': 'InvalidAnnotation', 'message': 'Document annotation is missing page or unit fields.', 'additionalInfo': None}]}, {'uuid': '9d7b2e57-d68f-4388-867a-af2a9b233719', 'dataRow': {'id': 'cl76eelhd0fm3071n9ropffkr'}, 'status': 'FAILURE', 'errors': [{'name': 'InvalidAnnotation', 'message': 'Document annotation is missing page or unit fields.', 'additionalInfo': None}]}, {'uuid': 'ad60897f-ea1a-47de-b923-459339764921', 'dataRow': {'id': 'cl76eelhd0fm3071n9ropffkr'}, 'status': 'FAILURE', 'errors': [{'name': 'InvalidAnnotation', 'message': 'Document annotation is missing page or unit fields.', 'additionalInfo': None}]}]\n" |
432 | | - ] |
433 | | - } |
434 | | - ], |
| 426 | + "outputs": [], |
435 | 427 | "source": [ |
436 | 428 | "# Errors will appear for each annotation that failed.\n", |
437 | 429 | "# Empty list means that there were no errors\n", |
438 | 430 | "# This will provide information only after the upload_job is complete, so we do not need to worry about having to rerun\n", |
439 | 431 | "print(\"Errors:\", upload_job.errors)" |
440 | 432 | ] |
441 | 433 | }, |
442 | | - { |
443 | | - "cell_type": "code", |
444 | | - "execution_count": 55, |
445 | | - "id": "dc0f963f", |
446 | | - "metadata": {}, |
447 | | - "outputs": [ |
448 | | - { |
449 | | - "data": { |
450 | | - "text/plain": [ |
451 | | - "{'tools': [{'tool': 'rectangle',\n", |
452 | | - " 'name': 'box',\n", |
453 | | - " 'required': False,\n", |
454 | | - " 'color': '#ff0000',\n", |
455 | | - " 'classifications': [],\n", |
456 | | - " 'schemaNodeId': None,\n", |
457 | | - " 'featureSchemaId': None}],\n", |
458 | | - " 'classifications': [{'type': 'text',\n", |
459 | | - " 'instructions': 'text',\n", |
460 | | - " 'name': 'text',\n", |
461 | | - " 'required': False,\n", |
462 | | - " 'options': [],\n", |
463 | | - " 'schemaNodeId': None,\n", |
464 | | - " 'featureSchemaId': None,\n", |
465 | | - " 'scope': 'global'},\n", |
466 | | - " {'type': 'checklist',\n", |
467 | | - " 'instructions': 'checklist',\n", |
468 | | - " 'name': 'checklist',\n", |
469 | | - " 'required': False,\n", |
470 | | - " 'options': [{'schemaNodeId': None,\n", |
471 | | - " 'featureSchemaId': None,\n", |
472 | | - " 'label': 'first_checklist_answer',\n", |
473 | | - " 'value': 'first_checklist_answer',\n", |
474 | | - " 'options': []},\n", |
475 | | - " {'schemaNodeId': None,\n", |
476 | | - " 'featureSchemaId': None,\n", |
477 | | - " 'label': 'second_checklist_answer',\n", |
478 | | - " 'value': 'second_checklist_answer',\n", |
479 | | - " 'options': []}],\n", |
480 | | - " 'schemaNodeId': None,\n", |
481 | | - " 'featureSchemaId': None,\n", |
482 | | - " 'scope': 'global'},\n", |
483 | | - " {'type': 'radio',\n", |
484 | | - " 'instructions': 'radio',\n", |
485 | | - " 'name': 'radio',\n", |
486 | | - " 'required': False,\n", |
487 | | - " 'options': [{'schemaNodeId': None,\n", |
488 | | - " 'featureSchemaId': None,\n", |
489 | | - " 'label': 'first_radio_answer',\n", |
490 | | - " 'value': 'first_radio_answer',\n", |
491 | | - " 'options': []},\n", |
492 | | - " {'schemaNodeId': None,\n", |
493 | | - " 'featureSchemaId': None,\n", |
494 | | - " 'label': 'second_radio_answer',\n", |
495 | | - " 'value': 'second_radio_answer',\n", |
496 | | - " 'options': []}],\n", |
497 | | - " 'schemaNodeId': None,\n", |
498 | | - " 'featureSchemaId': None,\n", |
499 | | - " 'scope': 'global'}]}" |
500 | | - ] |
501 | | - }, |
502 | | - "execution_count": 55, |
503 | | - "metadata": {}, |
504 | | - "output_type": "execute_result" |
505 | | - } |
506 | | - ], |
507 | | - "source": [ |
508 | | - "ontology_builder.asdict()" |
509 | | - ] |
510 | | - }, |
511 | 434 | { |
512 | 435 | "cell_type": "code", |
513 | 436 | "execution_count": null, |
514 | | - "id": "223e32f9", |
| 437 | + "id": "ba9dc45a", |
515 | 438 | "metadata": {}, |
516 | 439 | "outputs": [], |
517 | 440 | "source": [] |
|
0 commit comments