@@ -137,20 +137,17 @@ class LBV1Label(BaseModel):
137137 label_url : Optional [str ] = Extra ('View Label' )
138138 has_open_issues : Optional [float ] = Extra ('Has Open Issues' )
139139 skipped : Optional [bool ] = Extra ('Skipped' )
140+ media_type : Optional [str ] = Extra ('media_type' )
140141
141142 def to_common (self ) -> Label :
142143 if isinstance (self .label , list ):
143144 annotations = []
144145 for lbl in self .label :
145146 annotations .extend (lbl .to_common ())
146- data = VideoData (url = self .row_data ,
147- external_id = self .external_id ,
148- uid = self .data_row_id )
149147 else :
150148 annotations = self .label .to_common ()
151- data = self ._infer_media_type ()
152149
153- return Label (data = data ,
150+ return Label (data = self . _data_row_to_common () ,
154151 uid = self .id ,
155152 annotations = annotations ,
156153 extra = {
@@ -174,44 +171,49 @@ def from_common(cls, label: Label):
174171 external_id = label .data .external_id ,
175172 ** label .extra )
176173
177- def _infer_media_type (self ):
178- # Video annotations are formatted differently from text and images
179- # So we only need to differentiate those two
174+ def _data_row_to_common (self ) -> Union [ImageData , TextData , VideoData ]:
175+ # Use data row information to construct the appropriate annotatin type
180176 data_row_info = {
177+ 'url' if self ._is_url () else 'text' : self .row_data ,
181178 'external_id' : self .external_id ,
182179 'uid' : self .data_row_id
183180 }
184181
182+ self .media_type = self .media_type or self ._infer_media_type ()
183+ media_mapping = {
184+ 'text' : TextData ,
185+ 'image' : ImageData ,
186+ 'video' : VideoData
187+ }
188+ if self .media_type not in media_mapping :
189+ raise ValueError (
190+ f"Annotation types are only supported for { list (media_mapping )} media types."
191+ f" Found { self .media_type } ." )
192+ return media_mapping [self .media_type ](** data_row_info )
193+
194+ def _infer_media_type (self ) -> str :
195+ # Determines the data row type based on the label content
196+ if isinstance (self .label , list ):
197+ return 'video'
185198 if self ._has_text_annotations ():
186- # If it has text annotations then it must be text
187- if self ._is_url ():
188- return TextData (url = self .row_data , ** data_row_info )
189- else :
190- return TextData (text = self .row_data , ** data_row_info )
199+ return 'text'
191200 elif self ._has_object_annotations ():
192- # If it has object annotations and none are text annotations then it must be an image
193- if self ._is_url ():
194- return ImageData (url = self .row_data , ** data_row_info )
195- else :
196- return ImageData (text = self .row_data , ** data_row_info )
201+ return 'image'
197202 else :
198- # no annotations to infer data type from.
199- # Use information from the row_data format if possible.
200203 if self ._row_contains ((".jpg" , ".png" , ".jpeg" )) and self ._is_url ():
201- return ImageData (url = self .row_data , ** data_row_info )
202- elif self ._row_contains (
203- (".txt" , ".text" , ".html" )) and self ._is_url ():
204- return TextData (url = self .row_data , ** data_row_info )
205- elif not self ._is_url ():
206- return TextData (text = self .row_data , ** data_row_info )
204+ return 'image'
205+ elif (self ._row_contains ((".txt" , ".text" , ".html" )) and
206+ self ._is_url ()) or not self ._is_url ():
207+ return 'text'
207208 else :
208- # This is going to be urls that do not contain any file extensions
209- # This will only occur on skipped images.
210- # To use this converter on data with this url format
211- # filter out empty examples from the payload before deserializing.
209+ # This condition will occur when a data row url does not contain a file extension
210+ # and the label does not contain object annotations that indicate the media type.
211+ # As a temporary workaround you can explicitly set the media_type
212+ # in each label json payload before converting.
213+ # We will eventually provide the media type in the export.
212214 raise TypeError (
213- "Can't infer data type from row data. Remove empty examples before trying again. "
214- f"row_data: { self . row_data [: 200 ] } " )
215+ "Can't infer data type from row data. row_data: {self.row_data[:200]} "
216+ )
215217
216218 def _has_object_annotations (self ):
217219 return len (self .label .objects ) > 0
0 commit comments