1+ {
2+ "nbformat" : 4 ,
3+ "nbformat_minor" : 0 ,
4+ "metadata" : {},
5+ "cells" : [
6+ {
7+ "metadata" : {},
8+ "source" : [
9+ " !pip install -q \" labelbox[data]\" "
10+ ],
11+ "cell_type" : " code" ,
12+ "outputs" : [],
13+ "execution_count" : null
14+ },
15+ {
16+ "metadata" : {},
17+ "source" : [
18+ " import labelbox as lb\n " ,
19+ " from labelbox.schema.data_row_metadata import DataRowMetadataField, DataRowMetadataKind\n " ,
20+ " import datetime\n " ,
21+ " import random\n " ,
22+ " import os\n " ,
23+ " import json\n " ,
24+ " from PIL import Image\n " ,
25+ " from labelbox.schema.ontology import OntologyBuilder, Tool\n " ,
26+ " import requests\n " ,
27+ " from tqdm.notebook import tqdm\n " ,
28+ " import uuid\n " ,
29+ " from labelbox.data.annotation_types import Label, ImageData, ObjectAnnotation, Rectangle, Point"
30+ ],
31+ "cell_type" : " code" ,
32+ "outputs" : [],
33+ "execution_count" : null
34+ },
35+ {
36+ "metadata" : {},
37+ "source" : [
38+ " # Setup Labelbox client"
39+ ],
40+ "cell_type" : " markdown"
41+ },
42+ {
43+ "metadata" : {},
44+ "source" : [
45+ " # Initialize the Labelbox client\n " ,
46+ " API_KEY = \"\" # Place API key\n " ,
47+ " client = lb.Client(API_KEY)"
48+ ],
49+ "cell_type" : " code" ,
50+ "outputs" : [],
51+ "execution_count" : null
52+ },
53+ {
54+ "metadata" : {},
55+ "source" : [
56+ " # Download a public dataset\n "
57+ ],
58+ "cell_type" : " markdown"
59+ },
60+ {
61+ "metadata" : {},
62+ "source" : [
63+ " # Function to download files\n " ,
64+ " def download_files(filemap):\n " ,
65+ " path, uri = filemap\n " ,
66+ " if not os.path.exists(path):\n " ,
67+ " response = requests.get(uri, stream=True)\n " ,
68+ " with open(path, 'wb') as f:\n " ,
69+ " for chunk in response.iter_content(chunk_size=8192):\n " ,
70+ " f.write(chunk)\n " ,
71+ " return path"
72+ ],
73+ "cell_type" : " code" ,
74+ "outputs" : [],
75+ "execution_count" : null
76+ },
77+ {
78+ "metadata" : {},
79+ "source" : [
80+ " # Download data rows and annotations\n " ,
81+ " DATA_ROWS_URL = \" https://storage.googleapis.com/labelbox-datasets/VHR_geospatial/geospatial_datarows.json\"\n " ,
82+ " ANNOTATIONS_URL = \" https://storage.googleapis.com/labelbox-datasets/VHR_geospatial/geospatial_annotations.json\"\n " ,
83+ " download_files((\" data_rows.json\" , DATA_ROWS_URL))\n " ,
84+ " download_files((\" annotations.json\" , ANNOTATIONS_URL))"
85+ ],
86+ "cell_type" : " code" ,
87+ "outputs" : [],
88+ "execution_count" : null
89+ },
90+ {
91+ "metadata" : {},
92+ "source" : [
93+ " # Load data rows and annotations\n " ,
94+ " with open('data_rows.json') as fp:\n " ,
95+ " data_rows = json.load(fp)\n " ,
96+ " with open('annotations.json') as fp:\n " ,
97+ " annotations = json.load(fp)"
98+ ],
99+ "cell_type" : " code" ,
100+ "outputs" : [],
101+ "execution_count" : null
102+ },
103+ {
104+ "metadata" : {},
105+ "source" : [
106+ " # Create a dataset"
107+ ],
108+ "cell_type" : " markdown"
109+ },
110+ {
111+ "metadata" : {},
112+ "source" : [
113+ " # Create a new dataset\n " ,
114+ " dataset = client.create_dataset(name=\" Geospatial vessel detection\" )\n " ,
115+ " print(f\" Created dataset with ID: {dataset.uid}\" )"
116+ ],
117+ "cell_type" : " code" ,
118+ "outputs" : [],
119+ "execution_count" : null
120+ },
121+ {
122+ "metadata" : {},
123+ "source" : [
124+ " # Import Data Rows with Metadata"
125+ ],
126+ "cell_type" : " markdown"
127+ },
128+ {
129+ "metadata" : {},
130+ "source" : [
131+ " # Here is an example of adding two metadata fields to your Data Rows: a \" captureDateTime\" field with datetime value, and a \" tag\" field with string value\n " ,
132+ " metadata_ontology = client.get_data_row_metadata_ontology()\n " ,
133+ " datetime_schema_id = metadata_ontology.reserved_by_name[\" captureDateTime\" ].uid\n " ,
134+ " tag_schema_id = metadata_ontology.reserved_by_name[\" tag\" ].uid\n " ,
135+ " tag_items = [\" WorldView-1\" , \" WorldView-2\" , \" WorldView-3\" , \" WorldView-4\" ]\n " ,
136+ " \n " ,
137+ " for datarow in tqdm(data_rows):\n " ,
138+ " dt = datetime.datetime.utcnow() + datetime.timedelta(days=random.random()*30) # this is random datetime value\n " ,
139+ " tag_item = random.choice(tag_items) # this is a random tag value\n " ,
140+ " \n " ,
141+ " # Option 1: Specify metadata with a list of DataRowMetadataField. This is the recommended option since it comes with validation for metadata fields.\n " ,
142+ " metadata_fields = [\n " ,
143+ " DataRowMetadataField(schema_id=datetime_schema_id, value=dt),\n " ,
144+ " DataRowMetadataField(schema_id=tag_schema_id, value=tag_item)\n " ,
145+ " ]\n " ,
146+ " \n " ,
147+ " # Option 2: Uncomment to try. Alternatively, you can specify the metadata fields with dictionary format without declaring the DataRowMetadataField objects. It is equivalent to Option 1.\n " ,
148+ " # metadata_fields = [\n " ,
149+ " # {\" schema_id\" : datetime_schema_id, \" value\" : dt},\n " ,
150+ " # {\" schema_id\" : tag_schema_id, \" value\" : tag_item}\n " ,
151+ " # ]\n " ,
152+ " \n " ,
153+ " datarow[\" metadata_fields\" ] = metadata_fields"
154+ ],
155+ "cell_type" : " code" ,
156+ "outputs" : [],
157+ "execution_count" : null
158+ },
159+ {
160+ "metadata" : {},
161+ "source" : [
162+ " task = dataset.create_data_rows(data_rows)\n " ,
163+ " task.wait_till_done()\n " ,
164+ " print(f\" Failed data rows: {task.failed_data_rows}\" )\n " ,
165+ " print(f\" Errors: {task.errors}\" )\n " ,
166+ " \n " ,
167+ " if task.errors:\n " ,
168+ " for error in task.errors:\n " ,
169+ " if 'Duplicate global key' in error['message'] and dataset.row_count == 0:\n " ,
170+ " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n " ,
171+ " print(f\" Deleting empty dataset: {dataset}\" )\n " ,
172+ " dataset.delete()"
173+ ],
174+ "cell_type" : " code" ,
175+ "outputs" : [],
176+ "execution_count" : null
177+ },
178+ {
179+ "metadata" : {},
180+ "source" : [
181+ " Examine a Data Row"
182+ ],
183+ "cell_type" : " markdown"
184+ },
185+ {
186+ "metadata" : {},
187+ "source" : [
188+ " datarow = next(dataset.data_rows())\n " ,
189+ " print(datarow)"
190+ ],
191+ "cell_type" : " code" ,
192+ "outputs" : [],
193+ "execution_count" : null
194+ },
195+ {
196+ "metadata" : {},
197+ "source" : [
198+ " # Setup a labeling project"
199+ ],
200+ "cell_type" : " markdown"
201+ },
202+ {
203+ "metadata" : {},
204+ "source" : [
205+ " # Initialize the OntologyBuilder\n " ,
206+ " ontology_builder = OntologyBuilder()\n " ,
207+ " \n " ,
208+ " # Assuming 'annotations' is defined and contains the necessary data\n " ,
209+ " for category in annotations['categories']:\n " ,
210+ " print(category['name'])\n " ,
211+ " # Add tools to the ontology builder\n " ,
212+ " ontology_builder.add_tool(Tool(tool=Tool.Type.BBOX, name=category['name']))\n " ,
213+ " \n " ,
214+ " # Create the ontology in Labelbox\n " ,
215+ " ontology = client.create_ontology(\" Vessel Detection Ontology\" ,\n " ,
216+ " ontology_builder.asdict(),\n " ,
217+ " media_type=lb.MediaType.Image)\n " ,
218+ " print(f\" Created ontology with ID: {ontology.uid}\" )\n " ,
219+ " \n " ,
220+ " # Create a project and set up the ontology\n " ,
221+ " project = client.create_project(name=\" Vessel Detection\" , media_type=lb.MediaType.Image)\n " ,
222+ " project.setup_editor(ontology=ontology)\n " ,
223+ " print(f\" Created project with ID: {project.uid}\" )"
224+ ],
225+ "cell_type" : " code" ,
226+ "outputs" : [],
227+ "execution_count" : null
228+ },
229+ {
230+ "metadata" : {},
231+ "source" : [
232+ " # Export data rows from the dataset\n " ,
233+ " data_rows = [dr.uid for dr in dataset.export_data_rows()]\n " ,
234+ " \n " ,
235+ " # Randomly select 200 Data Rows (or fewer if the dataset has less than 200 data rows)\n " ,
236+ " sampled_data_rows = random.sample(data_rows, min(len(data_rows), 200))\n " ,
237+ " \n " ,
238+ " # Create a new batch in the project and add the sampled data rows\n " ,
239+ " batch = project.create_batch(\n " ,
240+ " \" Initial batch\" , # name of the batch\n " ,
241+ " sampled_data_rows, # list of Data Rows\n " ,
242+ " 1 # priority between 1-5\n " ,
243+ " )\n " ,
244+ " print(f\" Created batch with ID: {batch.uid}\" )"
245+ ],
246+ "cell_type" : " code" ,
247+ "outputs" : [],
248+ "execution_count" : null
249+ },
250+ {
251+ "metadata" : {},
252+ "source" : [
253+ " queued_data_rows = project.export_queued_data_rows()\n " ,
254+ " labels = []\n " ,
255+ " \n " ,
256+ " for datarow in queued_data_rows:\n " ,
257+ " annotations_list = []\n " ,
258+ " folder = datarow['externalId'].split(\" /\" )[0]\n " ,
259+ " id = datarow['externalId'].split(\" /\" )[1]\n " ,
260+ " if folder == \" positive_image_set\" :\n " ,
261+ " for image in annotations['images']:\n " ,
262+ " if image['file_name'] == id:\n " ,
263+ " for annotation in annotations['annotations']:\n " ,
264+ " if annotation['image_id'] == image['id']:\n " ,
265+ " bbox = annotation['bbox']\n " ,
266+ " category_id = annotation['category_id'] - 1\n " ,
267+ " class_name = None\n " ,
268+ " ontology = ontology_builder.asdict() # Get the ontology dictionary\n " ,
269+ " for category in ontology['tools']:\n " ,
270+ " if category['name'] == annotations['categories'][category_id]['name']:\n " ,
271+ " class_name = category['name']\n " ,
272+ " break\n " ,
273+ " if class_name:\n " ,
274+ " annotations_list.append(ObjectAnnotation(\n " ,
275+ " name=class_name,\n " ,
276+ " value=Rectangle(start=Point(x=bbox[0], y=bbox[1]), end=Point(x=bbox[2]+bbox[0], y=bbox[3]+bbox[1]))\n " ,
277+ " ))\n " ,
278+ " image_data = ImageData(uid=datarow['id'])\n " ,
279+ " labels.append(Label(data=image_data, annotations=annotations_list))\n "
280+ ],
281+ "cell_type" : " code" ,
282+ "outputs" : [],
283+ "execution_count" : null
284+ },
285+ {
286+ "metadata" : {},
287+ "source" : [
288+ " print(labels)"
289+ ],
290+ "cell_type" : " code" ,
291+ "outputs" : [],
292+ "execution_count" : null
293+ },
294+ {
295+ "metadata" : {},
296+ "source" : [
297+ " upload_job = lb.LabelImport.create_from_objects(\n " ,
298+ " client=client,\n " ,
299+ " project_id=project.uid,\n " ,
300+ " name=f\" label_import_job_{str(uuid.uuid4())}\" ,\n " ,
301+ " labels=labels\n " ,
302+ " )\n " ,
303+ " \n " ,
304+ " # Wait for the upload to finish and print the results\n " ,
305+ " upload_job.wait_until_done()\n " ,
306+ " \n " ,
307+ " print(f\" Errors: {upload_job.errors}\" )\n " ,
308+ " print(f\" Status of uploads: {upload_job.statuses}\" )"
309+ ],
310+ "cell_type" : " code" ,
311+ "outputs" : [],
312+ "execution_count" : null
313+ }
314+ ]
315+ }
0 commit comments