224224 {
225225 "metadata" : {},
226226 "source" : [
227- " ### Create\n " ,
228- " * Create a single data row with and without metadata "
227+ " ## Create\n " ,
228+ " We recommend the following methods to create data rows : `dataset.upsert_data_rows()`, and `dataset.create_data_rows()`, "
229229 ],
230230 "cell_type" : " markdown"
231231 },
232232 {
233233 "metadata" : {},
234234 "source" : [
235- " dataset = client.create_dataset(name=\" data_rows_demo_dataset\" )\n " ,
236- " \n " ,
237- " # It is recommended that you add global keys to your data rows.\n " ,
238- " dataset.create_data_row(row_data=\" https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0002.jpeg\" ,\n " ,
239- " global_key=str(uuid.uuid4()))\n " ,
240- " \n " ,
241- " # You can also upload metadata along with your data row\n " ,
242- " mdo = client.get_data_row_metadata_ontology()\n " ,
243- " dataset.create_data_row(row_data=\" https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0003.jpeg\" ,\n " ,
244- " global_key=str(uuid.uuid4()),\n " ,
245- " metadata_fields=[\n " ,
246- " lb.DataRowMetadataField(\n " ,
247- " schema_id=mdo.reserved_by_name[\" tag\" ].uid, # specify the schema id\n " ,
248- " value=\" tag_string\" , # typed inputs\n " ,
249- " ),\n " ,
250- " ],\n " ,
251- " )"
235+ " ### Create data rows via `dataset.upsert_data_rows()`"
252236 ],
253- "cell_type" : " code" ,
254- "outputs" : [],
255- "execution_count" : null
237+ "cell_type" : " markdown"
256238 },
257239 {
258240 "metadata" : {},
259241 "source" : [
260- " ### [Recommended] Bulk create data rows (This is much faster than creating individual data rows)"
242+ " # Create a dataset\n " ,
243+ " dataset = client.create_dataset(name=\" data_rows_demo_dataset_6\" )\n " ,
244+ " # You can also upload metadata along with your data row\n " ,
245+ " mdo = client.get_data_row_metadata_ontology()"
261246 ],
262- "cell_type" : " markdown"
247+ "cell_type" : " code" ,
248+ "outputs" : [],
249+ "execution_count" : null
263250 },
264251 {
265252 "metadata" : {},
266253 "source" : [
267- " # Create a dataset\n " ,
268- " dataset = client.create_dataset(name=\" data_rows_demo_dataset_2\" )\n " ,
269- " \n " ,
270254 " uploads = []\n " ,
271255 " # Generate data rows\n " ,
272- " for i in range(1,9 ):\n " ,
256+ " for i in range(1,8 ):\n " ,
273257 " uploads.append({\n " ,
274258 " \" row_data\" : f\" https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\" ,\n " ,
275259 " \" global_key\" : \" TEST-ID-%id\" % uuid.uuid1(),\n " ,
279263 " schema_id=mdo.reserved_by_name[\" tag\" ].uid, # specify the schema id\n " ,
280264 " value=\" tag_string\" , # typed inputs\n " ,
281265 " ),\n " ,
266+ " ],\n " ,
267+ " \" attachments\" : [\n " ,
268+ " {\n " ,
269+ " \" type\" : \" IMAGE_OVERLAY\" ,\n " ,
270+ " \" value\" : \" https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\"\n " ,
271+ " },\n " ,
272+ " {\n " ,
273+ " \" type\" : \" RAW_TEXT\" ,\n " ,
274+ " \" value\" : \" IOWA, Zone 2232, June 2022 [Text string]\"\n " ,
275+ " },\n " ,
276+ " {\n " ,
277+ " \" type\" : \" TEXT_URL\" ,\n " ,
278+ " \" value\" : \" https://storage.googleapis.com/labelbox-sample-datasets/Docs/text_attachment.txt\"\n " ,
279+ " },\n " ,
280+ " {\n " ,
281+ " \" type\" : \" IMAGE\" ,\n " ,
282+ " \" value\" : \" https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\"\n " ,
283+ " },\n " ,
284+ " {\n " ,
285+ " \" type\" : \" VIDEO\" ,\n " ,
286+ " \" value\" : \" https://storage.googleapis.com/labelbox-sample-datasets/Docs/drone_video.mp4\"\n " ,
287+ " },\n " ,
288+ " {\n " ,
289+ " \" type\" : \" HTML\" ,\n " ,
290+ " \" value\" : \" https://storage.googleapis.com/labelbox-sample-datasets/Docs/windy.html\"\n " ,
291+ " },\n " ,
292+ " {\n " ,
293+ " \" type\" : \" PDF_URL\" ,\n " ,
294+ " \" value\" : \" https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n " ,
295+ " }\n " ,
282296 " ]\n " ,
283297 " })\n " ,
284298 " \n " ,
285- " task1 = dataset.create_data_rows (uploads)\n " ,
299+ " task1 = dataset.upsert_data_rows (uploads)\n " ,
286300 " task1.wait_till_done()\n " ,
287301 " print(\" ERRORS: \" , task1.errors)\n " ,
288302 " print(\" RESULTS:\" , task1.result)"
294308 {
295309 "metadata" : {},
296310 "source" : [
297- " ### Create data rows with attachments "
311+ " Create data rows from data in your local path "
298312 ],
299313 "cell_type" : " markdown"
300314 },
301315 {
302316 "metadata" : {},
303317 "source" : [
304- " task2 = dataset.create_data_rows([{\n " ,
318+ " from PIL import Image\n " ,
319+ " \n " ,
320+ " # Create dummy empty jpeg file\n " ,
321+ " width = 400\n " ,
322+ " height = 300\n " ,
323+ " color = (255, 255, 255) # White color\n " ,
324+ " image = Image.new(\" RGB\" , (width, height), color)\n " ,
325+ " \n " ,
326+ " # Save the image as a JPEG file\n " ,
327+ " image.save(\" dummy.jpg\" )\n " ,
328+ " \n " ,
329+ " local_data_path = \" dummy.jpg\"\n " ,
330+ " \n " ,
331+ " data = {\n " ,
332+ " \" row_data\" : local_data_path,\n " ,
333+ " \" global_key\" : str(uuid.uuid4())\n " ,
334+ " }\n " ,
335+ " \n " ,
336+ " task3 = dataset.upsert_data_rows([data])\n " ,
337+ " task3.wait_till_done()\n " ,
338+ " print(\" ERRORS: \" , task3.errors)\n " ,
339+ " print(\" RESULTS:\" , task3.result)"
340+ ],
341+ "cell_type" : " code" ,
342+ "outputs" : [],
343+ "execution_count" : null
344+ },
345+ {
346+ "metadata" : {},
347+ "source" : [
348+ " # You can mix local files with urls when creating data rows\n " ,
349+ " task4 = dataset.upsert_data_rows([{\n " ,
305350 " \" row_data\" : \" https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0009.jpeg\" ,\n " ,
306- " \" global_key\" : str(uuid.uuid4()),\n " ,
307- " \" attachments\" : [\n " ,
308- " {\n " ,
309- " \" type\" : \" IMAGE_OVERLAY\" ,\n " ,
310- " \" value\" : \" https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\"\n " ,
311- " },\n " ,
312- " {\n " ,
313- " \" type\" : \" RAW_TEXT\" ,\n " ,
314- " \" value\" : \" IOWA, Zone 2232, June 2022 [Text string]\"\n " ,
315- " },\n " ,
316- " {\n " ,
317- " \" type\" : \" TEXT_URL\" ,\n " ,
318- " \" value\" : \" https://storage.googleapis.com/labelbox-sample-datasets/Docs/text_attachment.txt\"\n " ,
319- " },\n " ,
320- " {\n " ,
321- " \" type\" : \" IMAGE\" ,\n " ,
322- " \" value\" : \" https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\"\n " ,
323- " },\n " ,
324- " {\n " ,
325- " \" type\" : \" VIDEO\" ,\n " ,
326- " \" value\" : \" https://storage.googleapis.com/labelbox-sample-datasets/Docs/drone_video.mp4\"\n " ,
327- " },\n " ,
328- " {\n " ,
329- " \" type\" : \" HTML\" ,\n " ,
330- " \" value\" : \" https://storage.googleapis.com/labelbox-sample-datasets/Docs/windy.html\"\n " ,
331- " },\n " ,
332- " {\n " ,
333- " \" type\" : \" PDF_URL\" ,\n " ,
334- " \" value\" : \" https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n " ,
335- " }\n " ,
336- " ]\n " ,
351+ " \" global_key\" : str(uuid.uuid4())\n " ,
352+ " }, {\n " ,
353+ " \" row_data\" : local_data_path,\n " ,
354+ " \" global_key\" : str(uuid.uuid4())\n " ,
337355 " }])\n " ,
338- " print(\" ERRORS: \" , task2.errors)\n " ,
339- " print(\" RESULTS:\" , task2.result)"
356+ " task4.wait_till_done()\n " ,
357+ " print(\" ERRORS: \" , task4.errors)\n " ,
358+ " print(\" RESULTS:\" , task4.result)"
340359 ],
341360 "cell_type" : " code" ,
342361 "outputs" : [],
345364 {
346365 "metadata" : {},
347366 "source" : [
348- " ### Create data rows using data in your local path "
367+ " ### Create data rows via `dataset.create_data_rows()` \n "
349368 ],
350369 "cell_type" : " markdown"
351370 },
352371 {
353372 "metadata" : {},
354373 "source" : [
355- " # Local paths\n " ,
356- " local_data_path = \" /tmp/test_data_row.txt\"\n " ,
357- " with open(local_data_path, 'w') as file:\n " ,
358- " file.write(\" sample data\" )\n " ,
359- " \n " ,
360- " task3 = dataset.create_data_rows([local_data_path])\n " ,
361- " print(\" ERRORS: \" , task3.errors)\n " ,
362- " print(\" RESULTS:\" , task3.result)"
374+ " dataset_2 = client.create_dataset(name=\" data_rows_demo_dataset_3\" )"
363375 ],
364376 "cell_type" : " code" ,
365377 "outputs" : [],
368380 {
369381 "metadata" : {},
370382 "source" : [
371- " # You can mix local files with urls when creating data rows\n " ,
372- " task4 = dataset.create_data_rows([{\n " ,
373- " \" row_data\" : \" https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0003.jpeg\" ,\n " ,
374- " \" global_key\" : str(uuid.uuid4())\n " ,
375- " }, {\n " ,
376- " \" row_data\" : local_data_path,\n " ,
377- " \" global_key\" : str(uuid.uuid4())\n " ,
378- " }])\n " ,
379- " print(\" ERRORS: \" , task4.errors)\n " ,
380- " print(\" RESULTS:\" , task4.result)"
383+ " uploads = []\n " ,
384+ " # Generate data rows\n " ,
385+ " for i in range(1,9):\n " ,
386+ " uploads.append({\n " ,
387+ " \" row_data\" : f\" https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\" ,\n " ,
388+ " \" global_key\" : \" TEST-ID-%id\" % uuid.uuid1(),\n " ,
389+ " ## add metadata (optional)\n " ,
390+ " \" metadata_fields\" : [\n " ,
391+ " lb.DataRowMetadataField(\n " ,
392+ " schema_id=mdo.reserved_by_name[\" tag\" ].uid, # specify the schema id\n " ,
393+ " value=\" tag_string\" , # typed inputs\n " ,
394+ " ),\n " ,
395+ " ]\n " ,
396+ " })\n " ,
397+ " \n " ,
398+ " task1_2 = dataset_2.create_data_rows(uploads)\n " ,
399+ " task1_2.wait_till_done()\n " ,
400+ " print(\" ERRORS: \" , task1_2.errors)\n " ,
401+ " print(\" RESULTS:\" , task1_2.result)"
381402 ],
382403 "cell_type" : " code" ,
383404 "outputs" : [],
387408 "metadata" : {},
388409 "source" : [
389410 " ### Update\n " ,
390- " Only two fields can be updated after a data row is created \n " ,
391- " 1. Global keys \n " ,
392- " 2. Row data\n "
411+ " `dataset.upsert_data_rows()` can also be use to update data rows \n " ,
412+ " \n " ,
413+ " To update data rows using this method, you need to pass a `key`, which can reference either a global key or a data row ID. Additionally, include any fields that you wish to update along with their new values. \n "
393414 ],
394415 "cell_type" : " markdown"
395416 },
396417 {
397418 "metadata" : {},
398419 "source" : [
399- " data_row = client.get_data_row(\" <data_row_id_to_update>\" )\n " ,
400- " new_id = str(uuid.uuid4())\n " ,
401- " data_row.update(global_key=new_id, row_data=\" https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0005.jpeg\" )\n " ,
402- " print(data_row)"
420+ " # Fetch a data row from the first dataset example\n " ,
421+ " ts = dataset.export()\n " ,
422+ " ts.wait_till_done()\n " ,
423+ " DATA_ROW_ID = [json.loads(output.json_str) for output in ts.get_stream()][0]['data_row']['id']\n " ,
424+ " GLOBAL_KEY = [json.loads(output.json_str) for output in ts.get_stream()][0]['data_row']['global_key']\n " ,
425+ " \n " ,
426+ " print(f\" Pick either a data row id : {DATA_ROW_ID} or global key: {GLOBAL_KEY}\" )\n "
427+ ],
428+ "cell_type" : " code" ,
429+ "outputs" : [],
430+ "execution_count" : null
431+ },
432+ {
433+ "metadata" : {},
434+ "source" : [
435+ " # Update the global key assodicated with the DATAROW_ID or GLOBAL_KEY, and include a additional metadata\n " ,
436+ " data = {\n " ,
437+ " \" key\" : lb.UniqueId(DATA_ROW_ID),\n " ,
438+ " \" global_key\" : \" NEW-ID-%id\" % uuid.uuid1(),\n " ,
439+ " \" metadata_fields\" : [\n " ,
440+ " # New metadata\n " ,
441+ " lb.DataRowMetadataField(\n " ,
442+ " schema_id=mdo.reserved_by_name['captureDateTime'].uid,\n " ,
443+ " value=\" 2000-01-01 00:00:00\"\n " ,
444+ " ),\n " ,
445+ " # Include original metadata otherwise it will be removed\n " ,
446+ " lb.DataRowMetadataField(\n " ,
447+ " schema_id=mdo.reserved_by_name[\" tag\" ].uid,\n " ,
448+ " value=\" tag_string\" ,\n " ,
449+ " ),\n " ,
450+ " ]\n " ,
451+ " }\n " ,
452+ " \n " ,
453+ " task5 = dataset_2.upsert_data_rows([data])\n " ,
454+ " task5.wait_till_done()\n " ,
455+ " print(\" ERRORS: \" , task5.errors)\n " ,
456+ " print(\" RESULTS:\" , task5.result)"
403457 ],
404458 "cell_type" : " code" ,
405459 "outputs" : [],
408462 {
409463 "metadata" : {},
410464 "source" : [
411- " ### Create a single attachemt on an existing data row"
465+ " ### Create a single attachment on an existing data row"
412466 ],
413467 "cell_type" : " markdown"
414468 },
415469 {
416470 "metadata" : {},
417471 "source" : [
418472 " # You can only create one attachment at the time.\n " ,
419- " data_row.create_attachment(attachment_type=\" RAW_TEXT\" ,\n " ,
420- " attachment_value=\" LABELERS WILL SEE THIS \" )"
473+ " DATA_ROW_ID = \" <DATA-ROW-ID>\"\n " ,
474+ " data_row = client.get_data_row(DATA_ROW_ID)\n " ,
475+ " attachment = data_row.create_attachment(attachment_type=\" RAW_TEXT\" ,\n " ,
476+ " attachment_value=\" LABELERS WILL SEE THIS\" )"
477+ ],
478+ "cell_type" : " code" ,
479+ "outputs" : [],
480+ "execution_count" : null
481+ },
482+ {
483+ "metadata" : {},
484+ "source" : [
485+ " Update a recently created attachment "
486+ ],
487+ "cell_type" : " markdown"
488+ },
489+ {
490+ "metadata" : {},
491+ "source" : [
492+ " attachment.update(type= \" RAW_TEXT\" , value=\" NEW RAW TEXT\" )"
421493 ],
422494 "cell_type" : " code" ,
423495 "outputs" : [],
440512 {
441513 "metadata" : {},
442514 "source" : [
443- " data_row = client.get_data_row(\" <data_row_id_to_delete>\" )\n " ,
515+ " DATAROW_ID_TO_DELETE = \" <DATA-ROW-ID>\"\n " ,
516+ " data_row = client.get_data_row(DATAROW_ID_TO_DELETE)\n " ,
444517 " data_row.delete()"
445518 ],
446519 "cell_type" : " code" ,
465538 "execution_count" : null
466539 }
467540 ]
468- }
541+ }
0 commit comments