1515from io import StringIO
1616import requests
1717
18- from labelbox .exceptions import InvalidQueryError , LabelboxError , ResourceNotFoundError , InvalidAttributeError
18+ from labelbox .exceptions import InvalidQueryError , LabelboxError , ResourceNotFoundError , ResourceCreationError
1919from labelbox .orm .comparison import Comparison
2020from labelbox .orm .db_object import DbObject , Updateable , Deletable , experimental
2121from labelbox .orm .model import Entity , Field , Relationship
@@ -124,7 +124,6 @@ def data_rows(
124124
125125 def create_data_row (self , items = None , ** kwargs ) -> "DataRow" :
126126 """ Creates a single DataRow belonging to this dataset.
127-
128127 >>> dataset.create_data_row(row_data="http://my_site.com/photos/img_01.jpg")
129128
130129 Args:
@@ -139,82 +138,31 @@ def create_data_row(self, items=None, **kwargs) -> "DataRow":
139138 in `kwargs`.
140139 InvalidAttributeError: in case the DB object type does not contain
141140 any of the field names given in `kwargs`.
142-
141+ ResourceCreationError: If data row creation failed on the server side.
143142 """
144143 invalid_argument_error = "Argument to create_data_row() must be either a dictionary, or kwargs containing `row_data` at minimum"
145144
146- def convert_field_keys (items ):
147- if not isinstance (items , dict ):
148- raise InvalidQueryError (invalid_argument_error )
149- return {
150- key .name if isinstance (key , Field ) else key : value
151- for key , value in items .items ()
152- }
153-
154145 if items is not None and len (kwargs ) > 0 :
155146 raise InvalidQueryError (invalid_argument_error )
156147
157- DataRow = Entity .DataRow
158- args = convert_field_keys (items ) if items is not None else kwargs
159-
160- if DataRow .row_data .name not in args :
161- raise InvalidQueryError (
162- "DataRow.row_data missing when creating DataRow." )
163-
164- row_data = args [DataRow .row_data .name ]
165-
166- if isinstance (row_data , str ) and row_data .startswith ("s3:/" ):
167- raise InvalidQueryError (
168- "row_data: s3 assets must start with 'https'." )
169-
170- if not isinstance (row_data , str ):
171- # If the row data is an object, upload as a string
172- args [DataRow .row_data .name ] = json .dumps (row_data )
173- elif os .path .exists (row_data ):
174- # If row data is a local file path, upload it to server.
175- args [DataRow .row_data .name ] = self .client .upload_file (row_data )
176-
177- # Parse metadata fields, if they are provided
178- if DataRow .metadata_fields .name in args :
179- mdo = self .client .get_data_row_metadata_ontology ()
180- args [DataRow .metadata_fields .name ] = mdo .parse_upsert_metadata (
181- args [DataRow .metadata_fields .name ])
182-
183- if "embeddings" in args :
184- args ["embeddings" ] = [
185- EmbeddingVector (** e ).to_gql () for e in args ["embeddings" ]
186- ]
148+ args = items if items is not None else kwargs
187149
188- query_str = """mutation CreateDataRowPyApi(
189- $row_data: String!,
190- $metadata_fields: [DataRowCustomMetadataUpsertInput!],
191- $attachments: [DataRowAttachmentInput!],
192- $media_type : MediaType,
193- $external_id : String,
194- $global_key : String,
195- $dataset: ID!,
196- $embeddings: [DataRowEmbeddingVectorInput!]
197- ){
198- createDataRow(
199- data:
200- {
201- rowData: $row_data
202- mediaType: $media_type
203- metadataFields: $metadata_fields
204- externalId: $external_id
205- globalKey: $global_key
206- attachments: $attachments
207- dataset: {connect: {id: $dataset}}
208- embeddings: $embeddings
209- }
210- )
211- {%s}
212- }
213- """ % query .results_query_part (Entity .DataRow )
214- res = self .client .execute (query_str , {** args , 'dataset' : self .uid })
215- return DataRow (self .client , res ['createDataRow' ])
150+ file_upload_thread_count = 1
151+ completed_task = self ._create_data_rows_sync (
152+ [args ], file_upload_thread_count = file_upload_thread_count )
216153
217- def create_data_rows_sync (self , items ) -> None :
154+ res = completed_task .result
155+ if res is None or len (res ) == 0 :
156+ raise ResourceCreationError (
157+ f"Data row upload did not complete, task status { completed_task .status } task id { completed_task .uid } "
158+ )
159+
160+ return self .client .get_data_row (res [0 ]['id' ])
161+
162+ def create_data_rows_sync (
163+ self ,
164+ items ,
165+ file_upload_thread_count = FILE_UPLOAD_THREAD_COUNT ) -> None :
218166 """ Synchronously bulk upload data rows.
219167
220168 Use this instead of `Dataset.create_data_rows` for smaller batches of data rows that need to be uploaded quickly.
@@ -228,32 +176,49 @@ def create_data_rows_sync(self, items) -> None:
228176 None. If the function doesn't raise an exception then the import was successful.
229177
230178 Raises:
231- InvalidQueryError : If the `items` parameter does not conform to
179+ ResourceCreationError : If the `items` parameter does not conform to
232180 the specification in Dataset._create_descriptor_file or if the server did not accept the
233181 DataRow creation request (unknown reason).
234182 InvalidAttributeError: If there are fields in `items` not valid for
235183 a DataRow.
236184 ValueError: When the upload parameters are invalid
237185 """
186+ warnings .warn (
187+ "This method is deprecated and will be "
188+ "removed in a future release. Please use create_data_rows instead." )
189+
190+ self ._create_data_rows_sync (
191+ items , file_upload_thread_count = file_upload_thread_count )
192+
193+ return None # Return None if no exception is raised
194+
195+ def _create_data_rows_sync (self ,
196+ items ,
197+ file_upload_thread_count = FILE_UPLOAD_THREAD_COUNT
198+ ) -> "DataUpsertTask" :
238199 max_data_rows_supported = 1000
239- max_attachments_per_data_row = 5
240200 if len (items ) > max_data_rows_supported :
241201 raise ValueError (
242202 f"Dataset.create_data_rows_sync() supports a max of { max_data_rows_supported } data rows."
243203 " For larger imports use the async function Dataset.create_data_rows()"
244204 )
245- descriptor_url = DescriptorFileCreator (self .client ).create_one (
246- items , max_attachments_per_data_row = max_attachments_per_data_row )
247- dataset_param = "datasetId"
248- url_param = "jsonUrl"
249- query_str = """mutation AppendRowsToDatasetSyncPyApi($%s: ID!, $%s: String!){
250- appendRowsToDatasetSync(data:{datasetId: $%s, jsonFileUrl: $%s}
251- ){dataset{id}}} """ % (dataset_param , url_param , dataset_param ,
252- url_param )
253- self .client .execute (query_str , {
254- dataset_param : self .uid ,
255- url_param : descriptor_url
256- })
205+ if file_upload_thread_count < 1 :
206+ raise ValueError (
207+ "file_upload_thread_count must be a positive integer" )
208+
209+ task : DataUpsertTask = self .create_data_rows (items ,
210+ file_upload_thread_count )
211+ task .wait_till_done ()
212+
213+ if task .has_errors ():
214+ raise ResourceCreationError (
215+ f"Data row upload errors: { task .errors } " , cause = task .uid )
216+ if task .status != "COMPLETE" :
217+ raise ResourceCreationError (
218+ f"Data row upload did not complete, task status { task .status } task id { task .uid } "
219+ )
220+
221+ return task
257222
258223 def create_data_rows (self ,
259224 items ,
@@ -287,14 +252,18 @@ def create_data_rows(self,
287252 raise ValueError (
288253 "file_upload_thread_count must be a positive integer" )
289254
255+ # Usage example
256+ upload_items = self ._separate_and_process_items (items )
257+ specs = DataRowCreateItem .build (self .uid , upload_items )
258+ return self ._exec_upsert_data_rows (specs , file_upload_thread_count )
259+
260+ def _separate_and_process_items (self , items ):
290261 string_items = [item for item in items if isinstance (item , str )]
291262 dict_items = [item for item in items if isinstance (item , dict )]
292263 dict_string_items = []
293264 if len (string_items ) > 0 :
294265 dict_string_items = self ._build_from_local_paths (string_items )
295- specs = DataRowCreateItem .build (self .uid ,
296- dict_items + dict_string_items )
297- return self ._exec_upsert_data_rows (specs , file_upload_thread_count )
266+ return dict_items + dict_string_items
298267
299268 def _build_from_local_paths (
300269 self ,
0 commit comments