|
226 | 226 | "outputs": [], |
227 | 227 | "execution_count": null |
228 | 228 | }, |
| 229 | + { |
| 230 | + "metadata": {}, |
| 231 | + "source": [ |
| 232 | + "# Send a batch of data rows to the project" |
| 233 | + ], |
| 234 | + "cell_type": "markdown" |
| 235 | + }, |
229 | 236 | { |
230 | 237 | "metadata": {}, |
231 | 238 | "source": [ |
|
236 | 243 | " \"data_row_details\": True # Only export data row details \n", |
237 | 244 | "}\n", |
238 | 245 | "\n", |
239 | | - "# Initiate the streamable export task\n", |
| 246 | + "# Initiate the streamable export task from catalog\n", |
240 | 247 | "dataset = client.get_dataset(dataset.uid) # Update with the actual dataset ID\n", |
241 | 248 | "export_task = dataset.export(params=export_params)\n", |
242 | 249 | "export_task.wait_till_done()\n", |
243 | 250 | "print(export_task)\n", |
244 | 251 | "\n", |
245 | 252 | "data_rows = []\n", |
246 | 253 | "\n", |
| 254 | + "# Callback used for JSON Converter to correctly collect data row IDs\n", |
| 255 | + "def json_stream_handler(output: lb.JsonConverterOutput):\n", |
| 256 | + " # Parse the JSON string to access the data\n", |
| 257 | + " data = json.loads(output.json_str)\n", |
| 258 | + "\n", |
| 259 | + " # Correctly extract and append DataRow ID\n", |
| 260 | + " if 'data_row' in data and 'id' in data['data_row']:\n", |
| 261 | + " data_rows.append(data['data_row']['id'])\n", |
| 262 | + "\n", |
| 263 | + "# Process the stream if there are results\n", |
| 264 | + "if export_task.has_result():\n", |
| 265 | + " export_task.get_stream(\n", |
| 266 | + " converter=lb.JsonConverter(),\n", |
| 267 | + " stream_type=lb.StreamType.RESULT\n", |
| 268 | + " ).start(stream_handler=json_stream_handler)\n", |
| 269 | + "\n", |
| 270 | + "# Randomly select 200 Data Rows (or fewer if the dataset has less than 200 data rows)\n", |
| 271 | + "sampled_data_rows = random.sample(data_rows, min(len(data_rows), 200))\n", |
| 272 | + "\n", |
| 273 | + "# Create a new batch in the project and add the sampled data rows\n", |
| 274 | + "batch = project.create_batch(\n", |
| 275 | + " \"Initial batch\", # name of the batch\n", |
| 276 | + " sampled_data_rows, # list of Data Rows\n", |
| 277 | + " 1 # priority between 1-5\n", |
| 278 | + ")\n", |
| 279 | + "print(f\"Created batch with ID: {batch.uid}\")" |
| 280 | + ], |
| 281 | + "cell_type": "code", |
| 282 | + "outputs": [], |
| 283 | + "execution_count": null |
| 284 | + }, |
| 285 | + { |
| 286 | + "metadata": {}, |
| 287 | + "source": [ |
| 288 | + "# Create annotations payload" |
| 289 | + ], |
| 290 | + "cell_type": "markdown" |
| 291 | + }, |
| 292 | + { |
| 293 | + "metadata": {}, |
| 294 | + "source": [ |
| 295 | + "\n", |
| 296 | + "# Set export parameters focused on data row details\n", |
| 297 | + "export_params = {\n", |
| 298 | + " \"data_row_details\": True, # Only export data row details\n", |
| 299 | + " \"batch_ids\": [batch.uid], # Optional: Include batch ids to filter by specific batches\n", |
| 300 | + "}\n", |
| 301 | + "\n", |
| 302 | + "# Initialize the streamable export task from project\n", |
| 303 | + "export_task = project.export(params=export_params)\n", |
| 304 | + "export_task.wait_till_done()\n", |
| 305 | + "\n", |
| 306 | + "data_rows = []\n", |
| 307 | + "\n", |
247 | 308 | "def json_stream_handler(output: lb.JsonConverterOutput):\n", |
248 | 309 | " data_row = json.loads(output.json_str)\n", |
249 | 310 | " data_rows.append(data_row)\n", |
|
284 | 345 | " class_name = category['name']\n", |
285 | 346 | " break\n", |
286 | 347 | " if class_name:\n", |
287 | | - " annotations_list.append(ObjectAnnotation(\n", |
| 348 | + " annotations_list.append(ObjectAnnotation(\n", |
288 | 349 | " name=class_name,\n", |
289 | 350 | " value=Rectangle(start=Point(x=bbox[0], y=bbox[1]), end=Point(x=bbox[2]+bbox[0], y=bbox[3]+bbox[1]))\n", |
290 | 351 | " ))\n", |
|
0 commit comments