1- from labelbox .orm .db_object import DbObject
2- from labelbox .orm .model import Field , Relationship
1+ from typing import Generator
2+ from labelbox .orm .db_object import DbObject , experimental
3+ from labelbox .orm .model import Entity , Field , Relationship
4+ from labelbox .exceptions import LabelboxError
5+ from io import StringIO
6+ import ndjson
7+ import requests
8+ import logging
9+ import time
10+
11+ logger = logging .getLogger (__name__ )
312
413
514class Batch (DbObject ):
@@ -19,7 +28,50 @@ class Batch(DbObject):
1928 created_at = Field .DateTime ("created_at" )
2029 updated_at = Field .DateTime ("updated_at" )
2130 size = Field .Int ("size" )
31+ archived_at = Field .DateTime ("archived_at" )
2232
2333 # Relationships
2434 project = Relationship .ToOne ("Project" )
2535 created_by = Relationship .ToOne ("User" )
36+
37+ def export_data_rows (self , timeout_seconds = 120 ) -> Generator :
38+ """ Returns a generator that produces all data rows that are currently
39+ in this batch.
40+
41+ Note: For efficiency, the data are cached for 30 minutes. Newly created data rows will not appear
42+ until the end of the cache period.
43+
44+ Args:
45+ timeout_seconds (float): Max waiting time, in seconds.
46+ Returns:
47+ Generator that yields DataRow objects belonging to this batch.
48+ Raises:
49+ LabelboxError: if the export fails or is unable to download within the specified time.
50+ """
51+ id_param = "batchId"
52+ query_str = """mutation GetBatchDataRowsExportUrlPyApi($%s: ID!)
53+ {exportBatchDataRows(data:{batchId: $%s }) {downloadUrl createdAt status}}
54+ """ % (id_param , id_param )
55+ sleep_time = 2
56+ while True :
57+ res = self .client .execute (query_str , {id_param : self .uid })
58+ res = res ["exportBatchDataRows" ]
59+ if res ["status" ] == "COMPLETE" :
60+ download_url = res ["downloadUrl" ]
61+ response = requests .get (download_url )
62+ response .raise_for_status ()
63+ reader = ndjson .reader (StringIO (response .text ))
64+ return (
65+ Entity .DataRow (self .client , result ) for result in reader )
66+ elif res ["status" ] == "FAILED" :
67+ raise LabelboxError ("Data row export failed." )
68+
69+ timeout_seconds -= sleep_time
70+ if timeout_seconds <= 0 :
71+ raise LabelboxError (
72+ f"Unable to export data rows within { timeout_seconds } seconds."
73+ )
74+
75+ logger .debug ("Batch '%s' data row export, waiting for server..." ,
76+ self .uid )
77+ time .sleep (sleep_time )
0 commit comments