|
78 | 78 | "!pip install -q 'labelbox[data]'" |
79 | 79 | ], |
80 | 80 | "cell_type": "code", |
81 | | - "outputs": [ |
82 | | - { |
83 | | - "name": "stdout", |
84 | | - "output_type": "stream", |
85 | | - "text": [ |
86 | | - "\u001b[K |\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 185 kB 29.6 MB/s \n", |
87 | | - "\u001b[K |\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 7.8 MB 53.9 MB/s \n", |
88 | | - "\u001b[?25h Building wheel for pygeotile (setup.py) ... \u001b[?25l\u001b[?25hdone\n" |
89 | | - ] |
90 | | - } |
91 | | - ], |
| 81 | + "outputs": [], |
92 | 82 | "execution_count": null |
93 | 83 | }, |
94 | 84 | { |
|
102 | 92 | "metadata": {}, |
103 | 93 | "source": [ |
104 | 94 | "import labelbox as lb\n", |
105 | | - "import uuid" |
| 95 | + "import uuid\n", |
| 96 | + "import labelbox.types as lb_types" |
106 | 97 | ], |
107 | 98 | "cell_type": "code", |
108 | 99 | "outputs": [], |
|
120 | 111 | "metadata": {}, |
121 | 112 | "source": [ |
122 | 113 | "# Add your api key\n", |
123 | | - "API_KEY = None\n", |
| 114 | + "API_KEY = \"\"\n", |
124 | 115 | "client = lb.Client(api_key=API_KEY)" |
125 | 116 | ], |
126 | 117 | "cell_type": "code", |
|
146 | 137 | "source": [ |
147 | 138 | "# message based classifications\n", |
148 | 139 | "\n", |
149 | | - "ner_annotation = { \n", |
| 140 | + "ner_annotation_ndjson = { \n", |
150 | 141 | " \"name\": \"ner\",\n", |
151 | 142 | " \"location\": { \n", |
152 | 143 | " \"start\": 0, \n", |
153 | 144 | " \"end\": 8 \n", |
154 | 145 | " },\n", |
155 | 146 | " \"messageId\": \"4\"\n", |
156 | | - " }\n", |
| 147 | + " }" |
| 148 | + ], |
| 149 | + "cell_type": "code", |
| 150 | + "outputs": [], |
| 151 | + "execution_count": null |
| 152 | + }, |
| 153 | + { |
| 154 | + "metadata": {}, |
| 155 | + "source": [ |
| 156 | + "##### Classification free text #####\n", |
157 | 157 | "\n", |
158 | | - "text_annotation = {\n", |
| 158 | + "# Only supported with NDJSON\n", |
| 159 | + "text_annotation_ndjson = {\n", |
159 | 160 | " 'name': 'text_convo',\n", |
160 | 161 | " 'answer': 'the answer to the text questions right here',\n", |
161 | 162 | " 'messageId': \"0\"\n", |
162 | | - "}\n", |
163 | | - "\n", |
| 163 | + "}" |
| 164 | + ], |
| 165 | + "cell_type": "code", |
| 166 | + "outputs": [], |
| 167 | + "execution_count": null |
| 168 | + }, |
| 169 | + { |
| 170 | + "metadata": {}, |
| 171 | + "source": [ |
| 172 | + "##### Checklist Classification ####### \n", |
164 | 173 | "\n", |
165 | | - "checklist_annotation = {\n", |
| 174 | + "# Only supported with NDJSON\n", |
| 175 | + "checklist_annotation_ndjson = {\n", |
166 | 176 | " 'name': 'checklist_convo',\n", |
167 | 177 | " 'answers': [\n", |
168 | 178 | " {'name': 'first_checklist_answer'},\n", |
169 | 179 | " {'name': 'second_checklist_answer'}\n", |
170 | 180 | " ],\n", |
171 | 181 | " 'messageId': '2'\n", |
172 | | - "}\n", |
| 182 | + "}" |
| 183 | + ], |
| 184 | + "cell_type": "code", |
| 185 | + "outputs": [], |
| 186 | + "execution_count": null |
| 187 | + }, |
| 188 | + { |
| 189 | + "metadata": {}, |
| 190 | + "source": [ |
| 191 | + "######## Radio Classification ######\n", |
173 | 192 | "\n", |
174 | | - "radio_annotation = {\n", |
| 193 | + "# Only supported with NDJSON\n", |
| 194 | + "radio_annotation_ndjson = {\n", |
175 | 195 | " 'name': 'radio_convo',\n", |
176 | 196 | " 'answer': {\n", |
177 | 197 | " 'name': 'first_radio_answer'\n", |
|
201 | 221 | "metadata": {}, |
202 | 222 | "source": [ |
203 | 223 | "# Create one Labelbox dataset\n", |
204 | | - "dataset = client.create_dataset(name=\"conversational_annotation_import_demo_dataset\")\n", |
| 224 | + "\n", |
| 225 | + "global_key = \"conversation-1.json\"\n", |
205 | 226 | "\n", |
206 | 227 | "asset = {\n", |
207 | 228 | " \"row_data\": \"https://storage.googleapis.com/labelbox-developer-testing-assets/conversational_text/1000-conversations/conversation-1.json\",\n", |
208 | | - " \"global_key\": str(uuid.uuid1())\n", |
| 229 | + " \"global_key\": global_key\n", |
209 | 230 | "}\n", |
210 | 231 | "\n", |
211 | | - "\n", |
212 | | - "data_row = dataset.create_data_row(asset)\n", |
213 | | - "print(data_row)\n" |
| 232 | + "dataset = client.create_dataset(name=\"conversational_annotation_import_demo_dataset\")\n", |
| 233 | + "task = dataset.create_data_rows([asset])\n", |
| 234 | + "task.wait_till_done()\n", |
| 235 | + "print(\"Errors:\", task.errors)\n", |
| 236 | + "print(\"Failed data rows: \", task.failed_data_rows)" |
214 | 237 | ], |
215 | 238 | "cell_type": "code", |
216 | | - "outputs": [ |
217 | | - { |
218 | | - "name": "stdout", |
219 | | - "output_type": "stream", |
220 | | - "text": [ |
221 | | - "<DataRow {\n", |
222 | | - " \"created_at\": \"2022-12-23 20:18:48+00:00\",\n", |
223 | | - " \"external_id\": null,\n", |
224 | | - " \"global_key\": \"0206acac-82ff-11ed-a415-0242ac1c000c\",\n", |
225 | | - " \"media_attributes\": {},\n", |
226 | | - " \"metadata\": [],\n", |
227 | | - " \"metadata_fields\": [],\n", |
228 | | - " \"row_data\": \"https://storage.googleapis.com/labelbox-developer-testing-assets/conversational_text/1000-conversations/conversation-1.json\",\n", |
229 | | - " \"uid\": \"clc0ygvde029307yn96gv2byu\",\n", |
230 | | - " \"updated_at\": \"2022-12-23 20:18:48+00:00\"\n", |
231 | | - "}>\n" |
232 | | - ] |
233 | | - } |
234 | | - ], |
| 239 | + "outputs": [], |
235 | 240 | "execution_count": null |
236 | 241 | }, |
237 | 242 | { |
238 | 243 | "metadata": {}, |
239 | 244 | "source": [ |
240 | 245 | "## Step 2: Create/select an ontology\n", |
241 | 246 | "\n", |
242 | | - "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name`/`instructions` fields in your annotations to ensure the correct feature schemas are matched.\n", |
| 247 | + "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name` fields in your annotations to ensure the correct feature schemas are matched.\n", |
243 | 248 | "\n", |
244 | | - "For example, when we create the bounding box annotation [above](https://colab.research.google.com/drive/1rFv-VvHUBbzFYamz6nSMRJz1mEg6Ukqq#scrollTo=3umnTd-MfI0o&line=1&uniqifier=1), we provided the `name` as `text_convo`. Now, when we setup our ontology, we must ensure that the name of my bounding box tool is also `checklist_convo`. The same alignment must hold true for the other tools and classifications we create in our ontology." |
| 249 | + "For example, when we create the text annotation, we provided the `name` as `text_convo`. Now, when we setup our ontology, we must ensure that the name of the tool is also `text_convo`. The same alignment must hold true for the other tools and classifications we create in our ontology." |
245 | 250 | ], |
246 | 251 | "cell_type": "markdown" |
247 | 252 | }, |
|
254 | 259 | " tool=lb.Tool.Type.NER, \n", |
255 | 260 | " name=\"ner\")], \n", |
256 | 261 | " classifications=[ \n", |
257 | | - " lb.Classification( # Text classification given the name \"text\"\n", |
| 262 | + " lb.Classification( \n", |
258 | 263 | " class_type=lb.Classification.Type.TEXT,\n", |
259 | 264 | " scope=lb.Classification.Scope.INDEX, \n", |
260 | 265 | " instructions=\"text_convo\"), \n", |
261 | | - " lb.Classification( # Checklist classification given the name \"text\" with two options: \"first_checklist_answer\" and \"second_checklist_answer\"\n", |
| 266 | + " lb.Classification( \n", |
262 | 267 | " class_type=lb.Classification.Type.CHECKLIST, \n", |
263 | 268 | " scope=lb.Classification.Scope.INDEX, \n", |
264 | 269 | " instructions=\"checklist_convo\", \n", |
|
267 | 272 | " lb.Option(value=\"second_checklist_answer\") \n", |
268 | 273 | " ]\n", |
269 | 274 | " ), \n", |
270 | | - " lb.Classification( # Radio classification given the name \"text\" with two options: \"first_radio_answer\" and \"second_radio_answer\"\n", |
| 275 | + " lb.Classification( \n", |
271 | 276 | " class_type=lb.Classification.Type.RADIO, \n", |
272 | 277 | " instructions=\"radio_convo\", \n", |
273 | 278 | " scope=lb.Classification.Scope.INDEX, \n", |
|
277 | 282 | " ]\n", |
278 | 283 | " )\n", |
279 | 284 | " ]\n", |
280 | | - ")" |
| 285 | + ")\n", |
| 286 | + "\n", |
| 287 | + "ontology = client.create_ontology(\"Ontology Conversation Annotations\", ontology_builder.asdict(), media_type=lb.MediaType.Conversational)" |
281 | 288 | ], |
282 | 289 | "cell_type": "code", |
283 | 290 | "outputs": [], |
|
296 | 303 | "metadata": {}, |
297 | 304 | "source": [ |
298 | 305 | "# Create Labelbox project\n", |
299 | | - "project = client.create_project(name=\"conversational_mal_project\", \n", |
| 306 | + "project = client.create_project(name=\"conversational_project\", \n", |
300 | 307 | " media_type=lb.MediaType.Conversational)\n", |
301 | 308 | "\n", |
302 | | - "# Setup your ontology / labeling editor\n", |
303 | | - "editor = next(client.get_labeling_frontends(where=lb.LabelingFrontend.name == \"Editor\")) # Unless using a custom editor, do not modify this\n", |
304 | | - "\n", |
305 | | - "project.setup(editor, ontology_builder.asdict()) # Connect your ontology and editor to your project\n" |
| 309 | + "# Setup your ontology \n", |
| 310 | + "project.setup_editor(ontology) # Connect your ontology and editor to your project" |
306 | 311 | ], |
307 | 312 | "cell_type": "code", |
308 | 313 | "outputs": [ |
|
331 | 336 | "# Create a batch to send to your MAL project\n", |
332 | 337 | "batch = project.create_batch(\n", |
333 | 338 | " \"first-batch-convo-demo\", # Each batch in a project must have a unique name\n", |
334 | | - " [data_row.uid], # Paginated collection of data row objects\n", |
335 | | - " 5 # priority between 1(Highest) - 5(lowest)\n", |
| 339 | + " global_keys=global_key, # Paginated collection of data row objects, list of data row ids or global keys\n", |
| 340 | + " priority=5 # priority between 1(Highest) - 5(lowest)\n", |
336 | 341 | ")\n", |
337 | 342 | "\n", |
338 | 343 | "print(\"Batch: \", batch)" |
|
378 | 383 | "metadata": {}, |
379 | 384 | "source": [ |
380 | 385 | "label_ndjson = []\n", |
381 | | - "for annotations in [ner_annotation,\n", |
382 | | - " text_annotation,\n", |
383 | | - " checklist_annotation,\n", |
384 | | - " radio_annotation]:\n", |
| 386 | + "for annotations in [ner_annotation_ndjson,\n", |
| 387 | + " text_annotation_ndjson,\n", |
| 388 | + " checklist_annotation_ndjson,\n", |
| 389 | + " radio_annotation_ndjson]:\n", |
385 | 390 | " annotations.update({\n", |
386 | 391 | " 'dataRow': {\n", |
387 | | - " 'id': data_row.uid\n", |
| 392 | + " 'globalKey': global_key\n", |
388 | 393 | " }\n", |
389 | 394 | " })\n", |
390 | 395 | " label_ndjson.append(annotations)" |
|
393 | 398 | "outputs": [], |
394 | 399 | "execution_count": null |
395 | 400 | }, |
396 | | - { |
397 | | - "metadata": {}, |
398 | | - "source": [ |
399 | | - "label_ndjson" |
400 | | - ], |
401 | | - "cell_type": "code", |
402 | | - "outputs": [ |
403 | | - { |
404 | | - "data": { |
405 | | - "text/plain": [ |
406 | | - "[{'name': 'ner',\n", |
407 | | - " 'location': {'start': 0, 'end': 8},\n", |
408 | | - " 'messageId': '4',\n", |
409 | | - " 'dataRow': {'id': 'clc0ygvde029307yn96gv2byu'}},\n", |
410 | | - " {'name': 'text_convo',\n", |
411 | | - " 'answer': 'the answer to the text questions right here',\n", |
412 | | - " 'messageId': '0',\n", |
413 | | - " 'dataRow': {'id': 'clc0ygvde029307yn96gv2byu'}},\n", |
414 | | - " {'name': 'checklist_convo',\n", |
415 | | - " 'answers': [{'name': 'first_checklist_answer'},\n", |
416 | | - " {'name': 'second_checklist_answer'}],\n", |
417 | | - " 'messageId': '2',\n", |
418 | | - " 'dataRow': {'id': 'clc0ygvde029307yn96gv2byu'}},\n", |
419 | | - " {'name': 'radio_convo',\n", |
420 | | - " 'answer': {'name': 'first_radio_answer'},\n", |
421 | | - " 'messageId': '0',\n", |
422 | | - " 'dataRow': {'id': 'clc0ygvde029307yn96gv2byu'}}]" |
423 | | - ] |
424 | | - }, |
425 | | - "execution_count": 13, |
426 | | - "metadata": {}, |
427 | | - "output_type": "execute_result" |
428 | | - } |
429 | | - ], |
430 | | - "execution_count": null |
431 | | - }, |
432 | 401 | { |
433 | 402 | "metadata": {}, |
434 | 403 | "source": [ |
|
456 | 425 | "\n", |
457 | 426 | "upload_job.wait_until_done();\n", |
458 | 427 | "print(\"Errors:\", upload_job.errors)\n", |
459 | | - "print(\" \")" |
| 428 | + "print(\"Status of uploads: \", upload_job.statuses)" |
460 | 429 | ], |
461 | 430 | "cell_type": "code", |
462 | 431 | "outputs": [ |
|
471 | 440 | ], |
472 | 441 | "execution_count": null |
473 | 442 | }, |
474 | | - { |
475 | | - "metadata": {}, |
476 | | - "source": [ |
477 | | - "annotations" |
478 | | - ], |
479 | | - "cell_type": "code", |
480 | | - "outputs": [ |
481 | | - { |
482 | | - "data": { |
483 | | - "text/plain": [ |
484 | | - "[{'name': 'ner', 'location': {'start': 0, 'end': 8}, 'messageId': '4'},\n", |
485 | | - " {'name': 'text_convo',\n", |
486 | | - " 'answer': 'the answer to the text questions right here',\n", |
487 | | - " 'uuid': '0ae2b42b-0e01-4bd6-8e4f-5ebfe6402a05',\n", |
488 | | - " 'dataRow': {'id': 'clc0okhr74aq607yb6fv83crl'},\n", |
489 | | - " 'messageId': '0'},\n", |
490 | | - " {'name': 'checklist_convo',\n", |
491 | | - " 'uuid': '8a382c09-da4c-455f-80ee-16fb05165e4a',\n", |
492 | | - " 'answers': [{'name': 'first_checklist_answer'},\n", |
493 | | - " {'name': 'second_checklist_answer'}],\n", |
494 | | - " 'dataRow': {'id': 'clc0okhr74aq607yb6fv83crl'},\n", |
495 | | - " 'messageId': '2'},\n", |
496 | | - " {'name': 'radio_convo',\n", |
497 | | - " 'uuid': '515e4f05-6cf1-4e8c-b183-d633c49f5106',\n", |
498 | | - " 'dataRow': {'id': 'clc0okhr74aq607yb6fv83crl'},\n", |
499 | | - " 'answer': {'name': 'first_radio_answer'},\n", |
500 | | - " 'messageId': '0'}]" |
501 | | - ] |
502 | | - }, |
503 | | - "execution_count": 45, |
504 | | - "metadata": {}, |
505 | | - "output_type": "execute_result" |
506 | | - } |
507 | | - ], |
508 | | - "execution_count": null |
509 | | - }, |
510 | 443 | { |
511 | 444 | "metadata": {}, |
512 | 445 | "source": [ |
|
521 | 454 | "upload_job = lb.LabelImport.create_from_objects(\n", |
522 | 455 | " client = client, \n", |
523 | 456 | " project_id = project.uid, \n", |
524 | | - " name=\"label_geo_import_job\"+str(uuid.uuid4()), \n", |
| 457 | + " name=\"label_import_job\"+str(uuid.uuid4()), \n", |
525 | 458 | " # user label_ndjson if labels were created using python annotation tools\n", |
526 | 459 | " labels=label_ndjson)\n", |
527 | 460 | "\n", |
528 | 461 | "upload_job.wait_until_done();\n", |
529 | | - "print(\"Errors:\", upload_job.errors)" |
| 462 | + "print(\"Errors:\", upload_job.errors)\n", |
| 463 | + "print(\"Status of uploads: \", upload_job.statuses)" |
530 | 464 | ], |
531 | 465 | "cell_type": "code", |
532 | 466 | "outputs": [ |
|
0 commit comments