@@ -75,27 +75,24 @@ def create_data_row(self, **kwargs):
7575 return self .client ._create (DataRow , kwargs )
7676
7777 def create_data_rows (self , items ):
78-
79- ## NOTE TODOS
80- """
81- Add attachments (works with all types)
82- Add external ids to bulk imports
83- improved error handling (why job was accepted or not)
84- """
8578 """ Creates multiple DataRow objects based on the given `items`.
8679
8780 Each element in `items` can be either a `str` or a `dict`. If
8881 it is a `str`, then it is interpreted as a local file path. The file
8982 is uploaded to Labelbox and a DataRow referencing it is created.
9083
9184 If an item is a `dict`, then it could support one of the two following structures
92- 1. For static imagery, video, and text it should map `DataRow` fields (or their names) to values.
93- At the minimum an `item` passed as a `dict` must contain a `DataRow.row_data` key and value.
85+ 1. For static imagery, video, and text it should map `DataRow` field names to values.
86+ At the minimum an `item` passed as a `dict` must contain a `row_data` key and value.
87+ If the value for row_data is a local file path and the path exists,
88+ then the local file will be uploaded to labelbox.
89+
9490 2. For tiled imagery the dict must match the import structure specified in the link below
9591 https://docs.labelbox.com/data-model/en/index-en#tiled-imagery-import
9692
9793 >>> dataset.create_data_rows([
9894 >>> {DataRow.row_data:"http://my_site.com/photos/img_01.jpg"},
95+ >>> {DataRow.row_data:"/path/to/file1.jpg"},
9996 >>> "path/to/file2.jpg",
10097 >>> {"tileLayerUrl" : "http://", ...}
10198 >>> ])
@@ -123,72 +120,72 @@ def create_data_rows(self, items):
123120 DataRow = Entity .DataRow
124121
125122 def upload_if_necessary (item ):
126- if isinstance (item , str ):
127- item_url = self .client .upload_file (item )
128- item = {DataRow .row_data : item_url , DataRow .external_id : item }
129- elif isinstance (item , dict ):
130- if os .path .exists (item ['row_data' ]):
131- item_url = self .client .upload_file (item ['row_data' ])
132- parts = {
133- DataRow .row_data :
134- item_url ,
135- DataRow .external_id :
136- item .get ('external_id' , item ['row_data' ])
137- }
138- attachments = item .get ('attachments' )
139- if attachments :
140- item = {** parts , ** {'attachments' : attachments }}
141- else :
142- item = parts
123+ row_data = item ['row_data' ]
124+ if os .path .exists (row_data ):
125+ item_url = self .client .upload_file (item ['row_data' ])
126+ item = {
127+ "row_data" : item_url ,
128+ "external_id" : item .get ('external_id' , item ['row_data' ]),
129+ "attachments" : item .get ('attachments' , [])
130+ }
143131 return item
144132
145133 def validate_attachments (item ):
146134 attachments = item .get ('attachments' )
147135 if attachments :
148136 if isinstance (attachments , list ):
149137 for attachment in attachments :
150- for required_key in ['type' , 'value' ]:
151- if required_key not in attachment :
152- raise ValueError (
153- f"Must provide a `{ required_key } ` key for each attachment. Found { attachment } ."
154- )
155- attachment_type = attachment .get ('type' )
156- if attachment_type not in DataRow .supported_attachment_types :
157- raise ValueError (
158- f"meta_type must be one of { DataRow .supported_attachment_types } . Found { attachment_type } "
159- )
138+ Entity .AssetAttachment .validate_attachment_json (
139+ attachment )
160140 else :
161141 raise ValueError (
162142 f"Attachments must be a list. Found { type (attachments )} "
163143 )
164144 return attachments
165145
166- def convert_item (item ):
167- # Don't make any changes to tms data
168- validate_attachments (item )
169- if "tileLayerUrl" in item :
170- return item
171-
172- item = upload_if_necessary (item )
173- # Convert fields to string names.
174- item = {
175- key .name if isinstance (key , Field ) else key : value
176- for key , value in item .items ()
177- }
146+ def format_row (item ):
147+ # Formats user input into a consistent dict structure
148+ if isinstance (item , dict ):
149+ # Convert fields to strings
150+ item = {
151+ key .name if isinstance (key , Field ) else key : value
152+ for key , value in item .items ()
153+ }
154+ elif isinstance (item , str ):
155+ # The main advantage of using a string over a dict is that the user is specifying
156+ # that the file should exist locally.
157+ # That info is lost after this section so we should check for it here.
158+ if not os .path .exists (item ):
159+ raise ValueError (f"Filepath { item } does not exist." )
160+ item = {"row_data" : item , "external_id" : item }
161+ return item
178162
163+ def validate_keys (item ):
179164 if 'row_data' not in item :
180165 raise InvalidQueryError (
181166 "`row_data` missing when creating DataRow." )
182167
183- # TODO: This is technically breaking. but also idt anyone is using the other fields.
184168 invalid_keys = set (item ) - {
185- 'row_data' , 'external_id' , 'attachments'
169+ * { f . name for f in DataRow . fields ()} , 'attachments'
186170 }
187171 if invalid_keys :
188172 raise InvalidAttributeError (DataRow , invalid_keys )
173+ return item
174+
175+ def convert_item (item ):
176+ # Don't make any changes to tms data
177+ if "tileLayerUrl" in item :
178+ validate_attachments (item )
179+ return item
180+ # Convert all payload variations into the same dict format
181+ item = format_row (item )
182+ # Make sure required keys exist (and there are no extra keys)
183+ validate_keys (item )
184+ # Make sure attachments are valid
185+ validate_attachments (item )
186+ # Upload any local file paths
187+ item = upload_if_necessary (item )
189188
190- # Item is valid, convert it to a dict {graphql_field_name: value}
191- # Need to change the name of DataRow.row_data to "data"
192189 return {
193190 "data" if key == "row_data" else utils .camel_case (key ): value
194191 for key , value in item .items ()
@@ -207,7 +204,8 @@ def convert_item(item):
207204 query_str = """mutation AppendRowsToDatasetPyApi($%s: ID!, $%s: String!){
208205 appendRowsToDataset(data:{datasetId: $%s, jsonFileUrl: $%s}
209206 ){ taskId accepted errorMessage } } """ % (dataset_param , url_param ,
210- dataset_param , url_param )
207+ dataset_param , url_param )
208+
211209 res = self .client .execute (query_str , {
212210 dataset_param : self .uid ,
213211 url_param : descriptor_url
0 commit comments