Skip to content

Commit 6ac4298

Browse files
improved metadata nb (#1107)
Co-authored-by: ezekielemerson <eemerson2325@gmail.com>
1 parent 938c3cc commit 6ac4298

File tree

1 file changed

+27
-113
lines changed

1 file changed

+27
-113
lines changed

examples/basics/data_row_metadata.ipynb

Lines changed: 27 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -39,76 +39,36 @@
3939
{
4040
"metadata": {},
4141
"source": [
42-
"### Installation"
42+
"## Setup"
4343
],
4444
"cell_type": "markdown"
4545
},
4646
{
4747
"metadata": {},
4848
"source": [
49-
"!pip install -q --upgrade tensorflow-hub \\\n",
50-
" scikit-learn \\\n",
51-
" seaborn \\\n",
52-
" \"labelbox[data]\""
49+
"!pip install -q \"labelbox[data]\""
5350
],
5451
"cell_type": "code",
55-
"outputs": [
56-
{
57-
"output_type": "stream",
58-
"name": "stdout",
59-
"text": [
60-
"\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m9.8/9.8 MB\u001b[0m \u001b[31m42.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
61-
"\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m293.3/293.3 KB\u001b[0m \u001b[31m21.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
62-
"\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m189.2/189.2 KB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
63-
"\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m56.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
64-
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
65-
" Building wheel for pygeotile (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
66-
]
67-
}
68-
],
52+
"outputs": [],
6953
"execution_count": null
7054
},
7155
{
7256
"metadata": {},
7357
"source": [
74-
"## Setup"
75-
],
76-
"cell_type": "markdown"
77-
},
78-
{
79-
"metadata": {},
80-
"source": [
81-
"import random\n",
82-
"import numpy as np\n",
83-
"\n",
8458
"import labelbox as lb\n",
85-
"from sklearn.random_projection import GaussianRandomProjection\n",
86-
"import tensorflow as tf\n",
87-
"import seaborn as sns\n",
88-
"import tensorflow_hub as hub\n",
8959
"from datetime import datetime\n",
90-
"from tqdm.notebook import tqdm\n",
91-
"import requests\n",
9260
"from pprint import pprint\n",
9361
"from uuid import uuid4"
9462
],
9563
"cell_type": "code",
9664
"outputs": [],
9765
"execution_count": null
9866
},
99-
{
100-
"metadata": {},
101-
"source": [
102-
"# API Key and Client\n",
103-
"Provide a valid api key below in order to properly connect to the Labelbox Client."
104-
],
105-
"cell_type": "markdown"
106-
},
10767
{
10868
"metadata": {},
10969
"source": [
11070
"# Add your api key\n",
111-
"API_KEY=\"\"\n",
71+
"API_KEY = \"\"\n",
11272
"client = lb.Client(api_key=API_KEY)"
11373
],
11474
"cell_type": "code",
@@ -158,20 +118,7 @@
158118
"pprint(metadata_ontologies, indent=2)"
159119
],
160120
"cell_type": "code",
161-
"outputs": [
162-
{
163-
"output_type": "stream",
164-
"name": "stdout",
165-
"text": [
166-
"{ 'cko8s9r5v0001h2dk9elqdidh': DataRowMetadataSchema(uid='cko8s9r5v0001h2dk9elqdidh', name='tag', reserved=True, kind=<DataRowMetadataKind.string: 'CustomMetadataString'>, options=None, parent=None),\n",
167-
" 'cko8sbczn0002h2dkdaxb5kal': DataRowMetadataSchema(uid='cko8sbczn0002h2dkdaxb5kal', name='split', reserved=True, kind=<DataRowMetadataKind.enum: 'CustomMetadataEnum'>, options=[DataRowMetadataSchema(uid='cko8sbscr0003h2dk04w86hof', name='train', reserved=True, kind=<DataRowMetadataKind.option: 'CustomMetadataEnumOption'>, options=None, parent='cko8sbczn0002h2dkdaxb5kal'), DataRowMetadataSchema(uid='cko8sc2yr0004h2dk69aj5x63', name='valid', reserved=True, kind=<DataRowMetadataKind.option: 'CustomMetadataEnumOption'>, options=None, parent='cko8sbczn0002h2dkdaxb5kal'), DataRowMetadataSchema(uid='cko8scbz70005h2dkastwhgqt', name='test', reserved=True, kind=<DataRowMetadataKind.option: 'CustomMetadataEnumOption'>, options=None, parent='cko8sbczn0002h2dkdaxb5kal')], parent=None),\n",
168-
" 'cko8sbscr0003h2dk04w86hof': DataRowMetadataSchema(uid='cko8sbscr0003h2dk04w86hof', name='train', reserved=True, kind=<DataRowMetadataKind.option: 'CustomMetadataEnumOption'>, options=None, parent='cko8sbczn0002h2dkdaxb5kal'),\n",
169-
" 'cko8sc2yr0004h2dk69aj5x63': DataRowMetadataSchema(uid='cko8sc2yr0004h2dk69aj5x63', name='valid', reserved=True, kind=<DataRowMetadataKind.option: 'CustomMetadataEnumOption'>, options=None, parent='cko8sbczn0002h2dkdaxb5kal'),\n",
170-
" 'cko8scbz70005h2dkastwhgqt': DataRowMetadataSchema(uid='cko8scbz70005h2dkastwhgqt', name='test', reserved=True, kind=<DataRowMetadataKind.option: 'CustomMetadataEnumOption'>, options=None, parent='cko8sbczn0002h2dkdaxb5kal'),\n",
171-
" 'cko8sdzv70006h2dk8jg64zvb': DataRowMetadataSchema(uid='cko8sdzv70006h2dk8jg64zvb', name='captureDateTime', reserved=True, kind=<DataRowMetadataKind.datetime: 'CustomMetadataDateTime'>, options=None, parent=None)}\n"
172-
]
173-
}
174-
],
121+
"outputs": [],
175122
"execution_count": null
176123
},
177124
{
@@ -200,18 +147,7 @@
200147
"tag_field"
201148
],
202149
"cell_type": "code",
203-
"outputs": [
204-
{
205-
"output_type": "execute_result",
206-
"data": {
207-
"text/plain": [
208-
"DataRowMetadataSchema(uid='cko8s9r5v0001h2dk9elqdidh', name='tag', reserved=True, kind=<DataRowMetadataKind.string: 'CustomMetadataString'>, options=None, parent=None)"
209-
]
210-
},
211-
"metadata": {},
212-
"execution_count": 19
213-
}
214-
],
150+
"outputs": [],
215151
"execution_count": null
216152
},
217153
{
@@ -229,7 +165,7 @@
229165
{
230166
"metadata": {},
231167
"source": [
232-
"Option 1: Specify metadata with a list of DataRowMetadataField. This is the recommended option since it comes with validation for metadata fields."
168+
"Option 1: Specify metadata with a list of `DataRowMetadataField` objects. This is the recommended option since it comes with validation for metadata fields."
233169
],
234170
"cell_type": "markdown"
235171
},
@@ -261,7 +197,7 @@
261197
{
262198
"metadata": {},
263199
"source": [
264-
"Option 2: Alternatively, you can specify the metadata fields with dictionary format without declaring the DataRowMetadataField objects.\n"
200+
"Option 2: Alternatively, you can specify the metadata fields with dictionary format without declaring the `DataRowMetadataField` objects.\n"
265201
],
266202
"cell_type": "markdown"
267203
},
@@ -281,7 +217,7 @@
281217
"}\n",
282218
"\n",
283219
"# Construct a dictionary of Enums options metadata\n",
284-
"split_metadta_field_dict = {\n",
220+
"split_metadata_field_dict = {\n",
285221
" \"name\": \"split\",\n",
286222
" \"value\": \"train\",\n",
287223
"}"
@@ -293,23 +229,22 @@
293229
{
294230
"metadata": {},
295231
"source": [
296-
"# Upload Data Rows together with metadata\n",
232+
"## Upload data rows together with metadata\n",
297233
"\n",
298-
"Note: currently, there is a 30k limit on bulk uploading data rows containing metadata.\n",
299-
"\n"
234+
"See [Limits](https://docs.labelbox.com/docs/limits) for information on limits for uploading data rows in one API operation."
300235
],
301236
"cell_type": "markdown"
302237
},
303238
{
304239
"metadata": {},
305240
"source": [
306-
"# A simple example of uploading Data Rows with metadta\n",
241+
"# A simple example of uploading Data Rows with metadata\n",
307242
"dataset = client.create_dataset(name=\"Simple Data Rows import with metadata example\")\n",
308243
"\n",
309-
"data_row = {\"row_data\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/basic.jpg\", \"external_id\": str(uuid4())}\n",
310-
"data_row['metadata_fields'] = [tag_metadata_field, capture_datetime_field, split_metadta_field] \n",
244+
"data_row = {\"row_data\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/basic.jpg\", \"global_key\": str(uuid4())}\n",
245+
"data_row['metadata_fields'] = [tag_metadata_field, capture_datetime_field, split_metadata_field_dict] \n",
311246
"# Also works with a list of dictionary as specified in Option 2. Uncomment the line below to try. \n",
312-
"# data_row['metadata_fields'] = [tag_metadata_field_dict, capture_datetime_field_dict, split_metadta_field_dict]\n",
247+
"# data_row['metadata_fields'] = [tag_metadata_field_dict, capture_datetime_field_dict, split_metadata_field_dict]\n",
313248
"\n",
314249
"task = dataset.create_data_rows([data_row])\n",
315250
"task.wait_till_done()"
@@ -321,70 +256,49 @@
321256
{
322257
"metadata": {},
323258
"source": [
324-
"## Accessing Metadata\n",
259+
"## Accessing metadata\n",
325260
"\n",
326-
"You can examine individual Data Row, including its metadata."
261+
"You can examine an individual data row, including its metadata."
327262
],
328263
"cell_type": "markdown"
329264
},
330265
{
331266
"metadata": {},
332267
"source": [
333-
"datarow = next(dataset.data_rows())\n",
334-
"for metadata_field in datarow.metadata_fields:\n",
268+
"data_row = next(dataset.data_rows())\n",
269+
"for metadata_field in data_row.metadata_fields:\n",
335270
" print(metadata_field['name'], \":\", metadata_field['value'])"
336271
],
337272
"cell_type": "code",
338-
"outputs": [
339-
{
340-
"output_type": "stream",
341-
"name": "stdout",
342-
"text": [
343-
"tag : tag_string\n",
344-
"split : train\n",
345-
"captureDateTime : 2023-02-28T13:15:25.948052Z\n"
346-
]
347-
}
348-
],
273+
"outputs": [],
349274
"execution_count": null
350275
},
351276
{
352277
"metadata": {},
353278
"source": [
354-
"You can bulk export metadata given Data Row Ids"
279+
"You can bulk export metadata given data row IDs"
355280
],
356281
"cell_type": "markdown"
357282
},
358283
{
359284
"metadata": {},
360285
"source": [
361-
"datarows_metadata = mdo.bulk_export([datarow.uid])\n",
362-
"len(datarows_metadata)"
286+
"data_rows_metadata = mdo.bulk_export([data_row.uid])\n",
287+
"len(data_rows_metadata)"
363288
],
364289
"cell_type": "code",
365-
"outputs": [
366-
{
367-
"output_type": "execute_result",
368-
"data": {
369-
"text/plain": [
370-
"1"
371-
]
372-
},
373-
"metadata": {},
374-
"execution_count": 23
375-
}
376-
],
290+
"outputs": [],
377291
"execution_count": null
378292
},
379293
{
380294
"metadata": {},
381295
"source": [
382-
"# Upload/delete/update custom metadata to existing Data Rows\n",
296+
"## Upload/delete/update custom metadata for existing data rows\n",
383297
"\n",
384-
"For a complete tutorial on how to update, upload and delete custom metadata please follow the steps in this tutorial https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/basics/custom_embeddings.ipynb \n",
298+
"For a complete tutorial on how to update, upload and delete custom metadata please follow the steps in this [tutorial](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/basics/custom_embeddings.ipynb).\n",
385299
"\n"
386300
],
387301
"cell_type": "markdown"
388302
}
389303
]
390-
}
304+
}

0 commit comments

Comments
 (0)