|
229 | 229 | { |
230 | 230 | "metadata": {}, |
231 | 231 | "source": [ |
232 | | - "# Export data rows from the dataset\n", |
233 | | - "data_rows = [dr.uid for dr in dataset.export_data_rows()]\n", |
| 232 | + "client.enable_experimental = True\n", |
234 | 233 | "\n", |
235 | | - "# Randomly select 200 Data Rows (or fewer if the dataset has less than 200 data rows)\n", |
236 | | - "sampled_data_rows = random.sample(data_rows, min(len(data_rows), 200))\n", |
| 234 | + "# Minimal ExportV2 parameters focused solely on data row IDs\n", |
| 235 | + "export_params = {\n", |
| 236 | + " \"data_row_details\": True # Only export data row details \n", |
| 237 | + "}\n", |
237 | 238 | "\n", |
238 | | - "# Create a new batch in the project and add the sampled data rows\n", |
239 | | - "batch = project.create_batch(\n", |
240 | | - " \"Initial batch\", # name of the batch\n", |
241 | | - " sampled_data_rows, # list of Data Rows\n", |
242 | | - " 1 # priority between 1-5\n", |
243 | | - ")\n", |
244 | | - "print(f\"Created batch with ID: {batch.uid}\")" |
245 | | - ], |
246 | | - "cell_type": "code", |
247 | | - "outputs": [], |
248 | | - "execution_count": null |
249 | | - }, |
250 | | - { |
251 | | - "metadata": {}, |
252 | | - "source": [ |
253 | | - "queued_data_rows = project.export_queued_data_rows()\n", |
254 | | - "labels = []\n", |
| 239 | + "# Initiate the streamable export task\n", |
| 240 | + "dataset = client.get_dataset(dataset.uid) # Update with the actual dataset ID\n", |
| 241 | + "export_task = dataset.export(params=export_params)\n", |
| 242 | + "export_task.wait_till_done()\n", |
| 243 | + "print(export_task)\n", |
| 244 | + "\n", |
| 245 | + "data_rows = []\n", |
| 246 | + "\n", |
| 247 | + "def json_stream_handler(output: lb.JsonConverterOutput):\n", |
| 248 | + " data_row = json.loads(output.json_str)\n", |
| 249 | + " data_rows.append(data_row)\n", |
255 | 250 | "\n", |
256 | | - "for datarow in queued_data_rows:\n", |
| 251 | + "\n", |
| 252 | + "if export_task.has_errors():\n", |
| 253 | + " export_task.get_stream(\n", |
| 254 | + " \n", |
| 255 | + " converter=lb.JsonConverter(),\n", |
| 256 | + " stream_type=lb.StreamType.ERRORS\n", |
| 257 | + " ).start(stream_handler=lambda error: print(error))\n", |
| 258 | + "\n", |
| 259 | + "if export_task.has_result():\n", |
| 260 | + " export_json = export_task.get_stream(\n", |
| 261 | + " converter=lb.JsonConverter(),\n", |
| 262 | + " stream_type=lb.StreamType.RESULT\n", |
| 263 | + " ).start(stream_handler=json_stream_handler)\n", |
| 264 | + "\n", |
| 265 | + "labels = []\n", |
| 266 | + "for datarow in data_rows:\n", |
257 | 267 | " annotations_list = []\n", |
258 | | - " folder = datarow['externalId'].split(\"/\")[0]\n", |
259 | | - " id = datarow['externalId'].split(\"/\")[1]\n", |
| 268 | + " # Access the 'data_row' dictionary first\n", |
| 269 | + " data_row_dict = datarow['data_row']\n", |
| 270 | + " folder = data_row_dict['external_id'].split(\"/\")[0]\n", |
| 271 | + " id = data_row_dict['external_id'].split(\"/\")[1]\n", |
| 272 | + " \n", |
260 | 273 | " if folder == \"positive_image_set\":\n", |
261 | 274 | " for image in annotations['images']:\n", |
262 | 275 | " if image['file_name'] == id:\n", |
|
271 | 284 | " class_name = category['name']\n", |
272 | 285 | " break\n", |
273 | 286 | " if class_name:\n", |
274 | | - " annotations_list.append(ObjectAnnotation(\n", |
| 287 | + " annotations_list.append(ObjectAnnotation(\n", |
275 | 288 | " name=class_name,\n", |
276 | 289 | " value=Rectangle(start=Point(x=bbox[0], y=bbox[1]), end=Point(x=bbox[2]+bbox[0], y=bbox[3]+bbox[1]))\n", |
277 | 290 | " ))\n", |
278 | | - " image_data = ImageData(uid=datarow['id'])\n", |
279 | | - " labels.append(Label(data=image_data, annotations=annotations_list))\n" |
280 | | - ], |
281 | | - "cell_type": "code", |
282 | | - "outputs": [], |
283 | | - "execution_count": null |
284 | | - }, |
285 | | - { |
286 | | - "metadata": {}, |
287 | | - "source": [ |
288 | | - "print(labels)" |
| 291 | + " image_data = ImageData(uid=data_row_dict['id'])\n", |
| 292 | + " labels.append(Label(data=image_data, annotations=annotations_list))" |
289 | 293 | ], |
290 | 294 | "cell_type": "code", |
291 | 295 | "outputs": [], |
|
0 commit comments