1515logger = logging .getLogger (__name__ )
1616
1717
18- class LabelList :
19- """
20- A container for interacting with a collection of labels.
21- Less memory efficient than LabelGenerator but more performant and convenient to use.
22- Use on smaller datasets.
23- """
24-
25- def __init__ (self , data : Optional [Iterable [Label ]] = None ):
26- warnings .warn ("LabelList is deprecated and will be "
27- "removed in a future release." )
28-
29- if data is None :
30- self ._data = []
31- elif isinstance (data , Label ):
32- self ._data = [data ]
33- else :
34- self ._data = data
35- self ._index = 0
36-
37- def assign_feature_schema_ids (
38- self , ontology_builder : "ontology.OntologyBuilder" ) -> "LabelList" :
39- """
40- Adds schema ids to all FeatureSchema objects in the Labels.
41-
42- Args:
43- ontology_builder: The ontology that matches the feature names assigned to objects in this LabelList
44- Returns:
45- LabelList. useful for chaining these modifying functions
46-
47- Note: You can now import annotations using names directly without having to lookup schema_ids
48- """
49- warnings .warn ("This method is deprecated and will be "
50- "removed in a future release. Feature schema ids"
51- " are no longer required for importing." )
52- for label in self ._data :
53- label .assign_feature_schema_ids (ontology_builder )
54- return self
55-
56- def add_to_dataset (self ,
57- dataset : "Entity.Dataset" ,
58- signer : Callable [[bytes ], str ],
59- max_concurrency = 20 ) -> "LabelList" :
60- """
61- Creates data rows from each labels data object and attaches the data to the given dataset.
62- Updates the label's data object to have the same external_id and uid as the data row.
63- It is reccomended to create a new dataset if memory is a concern because all dataset data rows are exported to make this faster.
64- Also note that this relies on exported data that it cached.
65- So this will not work on the same dataset more frequently than every 30 min.
66- The workaround is creating a new dataset each time this function is used.
67-
68- Args:
69- dataset: labelbox dataset object to add the new data row to
70- signer: A function that accepts bytes and returns a signed url.
71- Returns:
72- LabelList with updated references to new data rows
73- """
74- self ._ensure_unique_external_ids ()
75- self .add_url_to_data (signer , max_concurrency = max_concurrency )
76- upload_task = dataset .create_data_rows ([{
77- 'row_data' : label .data .url ,
78- 'external_id' : label .data .external_id
79- } for label in self ._data ])
80- upload_task .wait_till_done ()
81-
82- data_row_lookup = {
83- data_row .external_id : data_row .uid
84- for data_row in dataset .export_data_rows ()
85- }
86- for label in self ._data :
87- label .data .uid = data_row_lookup [label .data .external_id ]
88- return self
89-
90- def add_url_to_masks (self , signer , max_concurrency = 20 ) -> "LabelList" :
91- """
92- Creates signed urls for all masks in the LabelList.
93- Multiple masks objects can reference the same MaskData so this makes sure we only upload that url once.
94- Only uploads url if one doesn't already exist.
95-
96- Args:
97- signer: A function that accepts bytes and returns a signed url.
98- max_concurrency: how many threads to use for uploading.
99- Should be balanced to match the signing services capabilities.
100- Returns:
101- LabelList with updated references to the new mask urls
102- """
103- for row in self ._apply_threaded (
104- [label .add_url_to_masks for label in self ._data ], max_concurrency ,
105- signer ):
106- ...
107- return self
108-
109- def add_url_to_data (self , signer , max_concurrency = 20 ) -> "LabelList" :
110- """
111- Creates signed urls for the data
112- Only uploads url if one doesn't already exist.
113-
114- Args:
115- signer: A function that accepts bytes and returns a signed url.
116- max_concurrency: how many threads to use for uploading.
117- Should be balanced to match the signing services capabilities.
118- Returns:
119- LabelList with updated references to the new data urls
120- """
121- for row in self ._apply_threaded (
122- [label .add_url_to_data for label in self ._data ], max_concurrency ,
123- signer ):
124- ...
125- return self
126-
127- def get_ontology (self ) -> ontology .OntologyBuilder :
128- classifications = []
129- tools = []
130- for label in self ._data :
131- tools = get_tools (label .object_annotations (), tools )
132- classifications = get_classifications (
133- label .classification_annotations (), classifications )
134- return ontology .OntologyBuilder (tools = tools ,
135- classifications = classifications )
136-
137- def _ensure_unique_external_ids (self ) -> None :
138- external_ids = set ()
139- for label in self ._data :
140- if label .data .external_id is None :
141- label .data .external_id = str (uuid4 ())
142- else :
143- if label .data .external_id in external_ids :
144- raise ValueError (
145- f"External ids must be unique for bulk uploading. Found { label .data .external_id } more than once."
146- )
147- external_ids .add (label .data .external_id )
148-
149- def append (self , label : Label ) -> None :
150- self ._data .append (label )
151-
152- def __iter__ (self ) -> "LabelList" :
153- self ._index = 0
154- return self
155-
156- def __next__ (self ) -> Label :
157- if self ._index == len (self ._data ):
158- self ._index = 0
159- raise StopIteration
160-
161- value = self ._data [self ._index ]
162- self ._index += 1
163- return value
164-
165- def __len__ (self ) -> int :
166- return len (self ._data )
167-
168- def __getitem__ (self , idx : int ) -> Label :
169- return self ._data [idx ]
170-
171- def _apply_threaded (self , fns , max_concurrency , * args ):
172- futures = []
173- with ThreadPoolExecutor (max_workers = max_concurrency ) as executor :
174- for fn in fns :
175- futures .append (executor .submit (fn , * args ))
176- for future in tqdm (as_completed (futures )):
177- yield future .result ()
178-
179-
18018class LabelGenerator (PrefetchGenerator ):
18119 """
18220 A container for interacting with a large collection of labels.
@@ -187,12 +25,6 @@ def __init__(self, data: Generator[Label, None, None], *args, **kwargs):
18725 self ._fns = {}
18826 super ().__init__ (data , * args , ** kwargs )
18927
190- def as_list (self ) -> "LabelList" :
191- warnings .warn ("This method is deprecated and will be "
192- "removed in a future release. LabelList"
193- " class will be deprecated." )
194- return LabelList (data = list (self ))
195-
19628 def assign_feature_schema_ids (
19729 self ,
19830 ontology_builder : "ontology.OntologyBuilder" ) -> "LabelGenerator" :
@@ -232,8 +64,6 @@ def add_to_dataset(self, dataset: "Entity.Dataset",
23264 Creates data rows from each labels data object and attaches the data to the given dataset.
23365 Updates the label's data object to have the same external_id and uid as the data row.
23466
235- This is a lot slower than LabelList.add_to_dataset but also more memory efficient.
236-
23767 Args:
23868 dataset: labelbox dataset object to add the new data row to
23969 signer: A function that accepts bytes and returns a signed url.
0 commit comments