55
66from tqdm import tqdm
77
8- from labelbox .schema . ontology import OntologyBuilder
8+ from labelbox .schema import ontology
99from labelbox .orm .model import Entity
10+ from ..ontology import get_classifications , get_tools
1011from ..generator import PrefetchGenerator
1112from .label import Label
1213
1314logger = logging .getLogger (__name__ )
1415
1516
16- class LabelCollection :
17+ class LabelList :
1718 """
1819 A container for interacting with a collection of labels.
1920 Less memory efficient than LabelGenerator but more performant and convenient to use.
@@ -25,15 +26,15 @@ def __init__(self, data: Iterable[Label]):
2526 self ._index = 0
2627
2728 def assign_schema_ids (
28- self , ontology_builder : OntologyBuilder ) -> "LabelCollection " :
29+ self , ontology_builder : "ontology. OntologyBuilder" ) -> "LabelList " :
2930 """
3031 Adds schema ids to all FeatureSchema objects in the Labels.
3132 This is necessary for MAL.
3233
3334 Args:
34- ontology_builder: The ontology that matches the feature names assigned to objects in this LabelCollection
35+ ontology_builder: The ontology that matches the feature names assigned to objects in this LabelList
3536 Returns:
36- LabelCollection . useful for chaining these modifying functions
37+ LabelList . useful for chaining these modifying functions
3738 """
3839 for label in self ._data :
3940 label .assign_schema_ids (ontology_builder )
@@ -42,7 +43,7 @@ def assign_schema_ids(
4243 def add_to_dataset (self ,
4344 dataset : "Entity.Dataset" ,
4445 signer : Callable [[bytes ], str ],
45- max_concurrency = 20 ) -> "LabelCollection " :
46+ max_concurrency = 20 ) -> "LabelList " :
4647 """
4748 Creates data rows from each labels data object and attaches the data to the given dataset.
4849 Updates the label's data object to have the same external_id and uid as the data row.
@@ -55,15 +56,15 @@ def add_to_dataset(self,
5556 dataset: labelbox dataset object to add the new data row to
5657 signer: A function that accepts bytes and returns a signed url.
5758 Returns:
58- LabelCollection with updated references to new data rows
59+ LabelList with updated references to new data rows
5960 """
6061 self ._ensure_unique_external_ids ()
6162 self .add_url_to_data (signer , max_concurrency = max_concurrency )
6263 upload_task = dataset .create_data_rows ([{
63- Entity . DataRow . row_data : label .data .url ,
64- Entity . DataRow . external_id : label .data .external_id
64+ ' row_data' : label .data .url ,
65+ ' external_id' : label .data .external_id
6566 } for label in self ._data ])
66- upload_task .wait_til_done ()
67+ upload_task .wait_till_done ()
6768
6869 data_row_lookup = {
6970 data_row .external_id : data_row .uid
@@ -73,9 +74,9 @@ def add_to_dataset(self,
7374 label .data .uid = data_row_lookup [label .data .external_id ]
7475 return self
7576
76- def add_url_to_masks (self , signer , max_concurrency = 20 ) -> "LabelCollection " :
77+ def add_url_to_masks (self , signer , max_concurrency = 20 ) -> "LabelList " :
7778 """
78- Creates signed urls for all masks in the LabelCollection .
79+ Creates signed urls for all masks in the LabelList .
7980 Multiple masks can reference the same RasterData mask so this makes sure we only upload that url once.
8081 Only uploads url if one doesn't already exist.
8182
@@ -84,15 +85,15 @@ def add_url_to_masks(self, signer, max_concurrency=20) -> "LabelCollection":
8485 max_concurrency: how many threads to use for uploading.
8586 Should be balanced to match the signing services capabilities.
8687 Returns:
87- LabelCollection with updated references to the new mask urls
88+ LabelList with updated references to the new mask urls
8889 """
8990 for row in self ._apply_threaded (
9091 [label .add_url_to_masks for label in self ._data ], max_concurrency ,
9192 signer ):
9293 ...
9394 return self
9495
95- def add_url_to_data (self , signer , max_concurrency = 20 ) -> "LabelCollection " :
96+ def add_url_to_data (self , signer , max_concurrency = 20 ) -> "LabelList " :
9697 """
9798 Creates signed urls for the data
9899 Only uploads url if one doesn't already exist.
@@ -102,32 +103,46 @@ def add_url_to_data(self, signer, max_concurrency=20) -> "LabelCollection":
102103 max_concurrency: how many threads to use for uploading.
103104 Should be balanced to match the signing services capabilities.
104105 Returns:
105- LabelCollection with updated references to the new data urls
106+ LabelList with updated references to the new data urls
106107 """
107108 for row in self ._apply_threaded (
108109 [label .add_url_to_data for label in self ._data ], max_concurrency ,
109110 signer ):
110111 ...
111112 return self
112113
114+ def get_ontology (self ) -> ontology .OntologyBuilder :
115+ classifications = []
116+ tools = []
117+ for label in self ._data :
118+ tools = get_tools (label .object_annotations (), tools )
119+ classifications = get_classifications (
120+ label .classification_annotations (), classifications )
121+ return ontology .OntologyBuilder (tools = tools ,
122+ classifications = classifications )
123+
113124 def _ensure_unique_external_ids (self ) -> None :
114125 external_ids = set ()
115126 for label in self ._data :
116127 if label .data .external_id is None :
117- label .data .external_id = uuid4 ()
128+ label .data .external_id = str ( uuid4 () )
118129 else :
119130 if label .data .external_id in external_ids :
120131 raise ValueError (
121132 f"External ids must be unique for bulk uploading. Found { label .data .external_id } more than once."
122133 )
123134 external_ids .add (label .data .external_id )
124135
125- def __iter__ (self ) -> "LabelCollection" :
136+ def append (self , label : Label ):
137+ self ._data .append (label )
138+
139+ def __iter__ (self ) -> "LabelList" :
126140 self ._index = 0
127141 return self
128142
129143 def __next__ (self ) -> Label :
130144 if self ._index == len (self ._data ):
145+ self ._index = 0
131146 raise StopIteration
132147
133148 value = self ._data [self ._index ]
@@ -154,18 +169,19 @@ class LabelGenerator(PrefetchGenerator):
154169 A container for interacting with a collection of labels.
155170
156171 Use this class if you have larger data. It is slightly harder to work with
157- than the LabelCollection but will be much more memory efficient.
172+ than the LabelList but will be much more memory efficient.
158173 """
159174
160175 def __init__ (self , data : Generator [Label , None , None ], * args , ** kwargs ):
161176 self ._fns = {}
162177 super ().__init__ (data , * args , ** kwargs )
163178
164- def as_collection (self ) -> "LabelCollection " :
165- return LabelCollection (data = list (self ))
179+ def as_list (self ) -> "LabelList " :
180+ return LabelList (data = list (self ))
166181
167182 def assign_schema_ids (
168- self , ontology_builder : OntologyBuilder ) -> "LabelGenerator" :
183+ self ,
184+ ontology_builder : "ontology.OntologyBuilder" ) -> "LabelGenerator" :
169185
170186 def _assign_ids (label : Label ):
171187 label .assign_schema_ids (ontology_builder )
@@ -190,7 +206,7 @@ def _add_url_to_data(label: Label):
190206 label .add_url_to_data (signer )
191207 return label
192208
193- self ._fns ['_add_url_to_data ' ] = _add_url_to_data
209+ self ._fns ['add_url_to_data ' ] = _add_url_to_data
194210 return self
195211
196212 def add_to_dataset (self , dataset : "Entity.Dataset" ,
@@ -199,7 +215,7 @@ def add_to_dataset(self, dataset: "Entity.Dataset",
199215 Creates data rows from each labels data object and attaches the data to the given dataset.
200216 Updates the label's data object to have the same external_id and uid as the data row.
201217
202- This is a lot slower than LabelCollection .add_to_dataset but also more memory efficient.
218+ This is a lot slower than LabelList .add_to_dataset but also more memory efficient.
203219
204220 Args:
205221 dataset: labelbox dataset object to add the new data row to
@@ -237,6 +253,20 @@ def _add_url_to_masks(label: Label):
237253 self ._fns ['add_url_to_masks' ] = _add_url_to_masks
238254 return self
239255
256+ def register_background_fn (self , fn : Callable [[Label ], Label ],
257+ name : str ) -> "LabelGenerator" :
258+ """
259+ Allows users to add arbitrary io functions to the generator.
260+ These functions will be exectuted in parallel and added to a prefetch queue.
261+
262+ Args:
263+ fn: Callable that modifies a label and then returns the same label
264+ - For performance reasons, this function shouldn't run if the object already has the desired state.
265+ name: Register the name of the function. If the name already exists, then the function will be replaced.
266+ """
267+ self ._fns [name ] = fn
268+ return self
269+
240270 def __iter__ (self ):
241271 return self
242272
@@ -255,4 +285,4 @@ def __next__(self):
255285 return self ._process (value )
256286
257287
258- LabelData = Union [LabelCollection , LabelGenerator ]
288+ LabelCollection = Union [LabelList , LabelGenerator ]
0 commit comments