Skip to content

Commit 1a7c21f

Browse files
author
Matt Sokoloff
committed
added docstrings
1 parent b108e31 commit 1a7c21f

File tree

31 files changed

+542
-564
lines changed

31 files changed

+542
-564
lines changed

labelbox/data/annotation_types/annotation.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,51 @@
11
from typing import Any, Dict, List, Union
22

33
from .classification import Checklist, Dropdown, Radio, Text
4-
from .feature import FeatureSchemaRef
4+
from .feature import FeatureSchema
55
from .geometry import Geometry
66
from .ner import TextEntity
77

88

9-
class BaseAnnotation(FeatureSchemaRef):
9+
class BaseAnnotation(FeatureSchema):
10+
""" Base annotation class. Shouldn't be directly instantiated
11+
"""
1012
classifications: List["ClassificationAnnotation"] = []
1113
extra: Dict[str, Any] = {}
1214

1315

1416
class ObjectAnnotation(BaseAnnotation):
17+
"""Class representing objects annotations (non classifications or annotations that have a location)
18+
"""
1519
value: Union[TextEntity, Geometry]
1620

1721

1822
class ClassificationAnnotation(BaseAnnotation):
23+
"""Class represneting classification annotations (annotations that don't have a location) """
1924
value: Union[Text, Checklist, Radio, Dropdown]
2025

2126

2227
ClassificationAnnotation.update_forward_refs()
2328

2429

2530
class VideoObjectAnnotation(ObjectAnnotation):
31+
"""
32+
Class for video objects annotations
33+
34+
Args:
35+
frame: The frame index that this annotation corresponds to
36+
keyframe: Whether or not this annotation was a human generated or interpolated annotation
37+
"""
2638
frame: int
2739
keyframe: bool
2840

2941

3042
class VideoClassificationAnnotation(ClassificationAnnotation):
43+
"""
44+
Class for video classification annotations
45+
46+
Args:
47+
frame: The frame index that this annotation corresponds to
48+
"""
3149
frame: int
3250

3351

labelbox/data/annotation_types/classification/classification.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,36 @@
22

33
from pydantic.main import BaseModel
44

5-
from ..feature import FeatureSchemaRef
5+
from ..feature import FeatureSchema
66

77

8-
class ClassificationAnswer(FeatureSchemaRef):
8+
class ClassificationAnswer(FeatureSchema):
9+
"""
10+
- Represents a classification option.
11+
- Because it inherits from FeatureSchema
12+
the option can be represented with either the name or schema_id
13+
"""
914
extra: Dict[str, Any] = {}
1015

1116

1217
class Radio(BaseModel):
18+
""" A classification with only one selected option allowed """
1319
answer: ClassificationAnswer
1420

1521

1622
class Checklist(BaseModel):
23+
""" A classification with many selected options allowed """
1724
answer: List[ClassificationAnswer]
1825

1926

2027
class Text(BaseModel):
28+
""" Free form text """
2129
answer: str
2230

2331

2432
class Dropdown(BaseModel):
33+
"""
34+
- A classification with many selected options allowed .
35+
- This is not currently compatible with MAL.
36+
"""
2537
answer: List[ClassificationAnswer]

labelbox/data/annotation_types/collection.py

Lines changed: 117 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -15,64 +15,47 @@
1515

1616
class LabelCollection:
1717
"""
18-
A container for
19-
18+
A container for interacting with a collection of labels.
19+
Less memory efficient than LabelGenerator but more performant and convenient to use.
20+
Use on smaller datasets.
2021
"""
2122

2223
def __init__(self, data: Iterable[Label]):
2324
self._data = data
2425
self._index = 0
2526

26-
def __iter__(self):
27-
self._index = 0
28-
return self
29-
30-
def __next__(self) -> Label:
31-
if self._index == len(self._data):
32-
raise StopIteration
33-
34-
value = self._data[self._index]
35-
self._index += 1
36-
return value
37-
38-
def __len__(self) -> int:
39-
return len(self._data)
40-
41-
def __getitem__(self, idx: int) -> Label:
42-
return self._data[idx]
43-
4427
def assign_schema_ids(
4528
self, ontology_builder: OntologyBuilder) -> "LabelCollection":
4629
"""
47-
Based on an ontology:
48-
- Checks to make sure that the feature names exist in the ontology
49-
- Updates the names to match the ontology.
30+
Adds schema ids to all FeatureSchema objects in the Labels.
31+
This is necessary for MAL.
32+
33+
Args:
34+
ontology_builder: The ontology that matches the feature names assigned to objects in this LabelCollection
35+
Returns:
36+
LabelCollection. useful for chaining these modifying functions
5037
"""
5138
for label in self._data:
5239
label.assign_schema_ids(ontology_builder)
5340
return self
5441

55-
def _ensure_unique_external_ids(self) -> None:
56-
external_ids = set()
57-
for label in self._data:
58-
if label.data.external_id is None:
59-
label.data.external_id = uuid4()
60-
else:
61-
if label.data.external_id in external_ids:
62-
raise ValueError(
63-
f"External ids must be unique for bulk uploading. Found {label.data.external_id} more than once."
64-
)
65-
external_ids.add(label.data.external_id)
66-
6742
def add_to_dataset(self,
68-
dataset,
69-
signer,
43+
dataset: "Entity.Dataset",
44+
signer: Callable[[bytes], str],
7045
max_concurrency=20) -> "LabelCollection":
7146
"""
72-
# It is reccomended to create a new dataset if memory is a concern
73-
# Also note that this relies on exported data that it cached.
74-
# So this will not work on the same dataset more frequently than every 30 min.
75-
# The workaround is creating a new dataset
47+
Creates data rows from each labels data object and attaches the data to the given dataset.
48+
Updates the label's data object to have the same external_id and uid as the data row.
49+
It is reccomended to create a new dataset if memory is a concern because all dataset data rows are exported to make this faster.
50+
Also note that this relies on exported data that it cached.
51+
So this will not work on the same dataset more frequently than every 30 min.
52+
The workaround is creating a new dataset each time this function is used.
53+
54+
Args:
55+
dataset: labelbox dataset object to add the new data row to
56+
signer: A function that accepts bytes and returns a signed url.
57+
Returns:
58+
LabelCollection with updated references to new data rows
7659
"""
7760
self._ensure_unique_external_ids()
7861
self.add_url_to_data(signer, max_concurrency=max_concurrency)
@@ -92,8 +75,16 @@ def add_to_dataset(self,
9275

9376
def add_url_to_masks(self, signer, max_concurrency=20) -> "LabelCollection":
9477
"""
95-
Creates a data row id for each data row that needs it. If the data row exists then it skips the row.
96-
TODO: Add error handling..
78+
Creates signed urls for all masks in the LabelCollection.
79+
Multiple masks can reference the same RasterData mask so this makes sure we only upload that url once.
80+
Only uploads url if one doesn't already exist.
81+
82+
Args:
83+
signer: A function that accepts bytes and returns a signed url.
84+
max_concurrency: how many threads to use for uploading.
85+
Should be balanced to match the signing services capabilities.
86+
Returns:
87+
LabelCollection with updated references to the new mask urls
9788
"""
9889
for row in self._apply_threaded(
9990
[label.add_url_to_masks for label in self._data], max_concurrency,
@@ -103,14 +94,52 @@ def add_url_to_masks(self, signer, max_concurrency=20) -> "LabelCollection":
10394

10495
def add_url_to_data(self, signer, max_concurrency=20) -> "LabelCollection":
10596
"""
106-
TODO: Add error handling..
97+
Creates signed urls for the data
98+
Only uploads url if one doesn't already exist.
99+
100+
Args:
101+
signer: A function that accepts bytes and returns a signed url.
102+
max_concurrency: how many threads to use for uploading.
103+
Should be balanced to match the signing services capabilities.
104+
Returns:
105+
LabelCollection with updated references to the new data urls
107106
"""
108107
for row in self._apply_threaded(
109108
[label.add_url_to_data for label in self._data], max_concurrency,
110109
signer):
111110
...
112111
return self
113112

113+
def _ensure_unique_external_ids(self) -> None:
114+
external_ids = set()
115+
for label in self._data:
116+
if label.data.external_id is None:
117+
label.data.external_id = uuid4()
118+
else:
119+
if label.data.external_id in external_ids:
120+
raise ValueError(
121+
f"External ids must be unique for bulk uploading. Found {label.data.external_id} more than once."
122+
)
123+
external_ids.add(label.data.external_id)
124+
125+
def __iter__(self) -> "LabelCollection":
126+
self._index = 0
127+
return self
128+
129+
def __next__(self) -> Label:
130+
if self._index == len(self._data):
131+
raise StopIteration
132+
133+
value = self._data[self._index]
134+
self._index += 1
135+
return value
136+
137+
def __len__(self) -> int:
138+
return len(self._data)
139+
140+
def __getitem__(self, idx: int) -> Label:
141+
return self._data[idx]
142+
114143
def _apply_threaded(self, fns, max_concurrency, *args):
115144
futures = []
116145
with ThreadPoolExecutor(max_workers=max_concurrency) as executor:
@@ -122,6 +151,8 @@ def _apply_threaded(self, fns, max_concurrency, *args):
122151

123152
class LabelGenerator(PrefetchGenerator):
124153
"""
154+
A container for interacting with a collection of labels.
155+
125156
Use this class if you have larger data. It is slightly harder to work with
126157
than the LabelCollection but will be much more memory efficient.
127158
"""
@@ -130,14 +161,6 @@ def __init__(self, data: Generator[Label, None, None], *args, **kwargs):
130161
self._fns = {}
131162
super().__init__(data, *args, **kwargs)
132163

133-
def __iter__(self):
134-
return self
135-
136-
def process(self, value):
137-
for fn in self._fns.values():
138-
value = fn(value)
139-
return value
140-
141164
def as_collection(self) -> "LabelCollection":
142165
return LabelCollection(data=list(self))
143166

@@ -154,8 +177,13 @@ def _assign_ids(label: Label):
154177
def add_url_to_data(self, signer: Callable[[bytes],
155178
str]) -> "LabelGenerator":
156179
"""
157-
Updates masks to have `url` attribute
158-
Doesn't update masks that already have urls
180+
Creates signed urls for the data
181+
Only uploads url if one doesn't already exist.
182+
183+
Args:
184+
signer: A function that accepts bytes and returns a signed url.
185+
Returns:
186+
LabelGenerator that signs urls as data is accessed
159187
"""
160188

161189
def _add_url_to_data(label: Label):
@@ -165,8 +193,20 @@ def _add_url_to_data(label: Label):
165193
self._fns['_add_url_to_data'] = _add_url_to_data
166194
return self
167195

168-
def add_to_dataset(self, dataset,
196+
def add_to_dataset(self, dataset: "Entity.Dataset",
169197
signer: Callable[[bytes], str]) -> "LabelGenerator":
198+
"""
199+
Creates data rows from each labels data object and attaches the data to the given dataset.
200+
Updates the label's data object to have the same external_id and uid as the data row.
201+
202+
This is a lot slower than LabelCollection.add_to_dataset but also more memory efficient.
203+
204+
Args:
205+
dataset: labelbox dataset object to add the new data row to
206+
signer: A function that accepts bytes and returns a signed url.
207+
Returns:
208+
LabelGenerator that updates references to the new data rows as data is accessed
209+
"""
170210

171211
def _add_to_dataset(label: Label):
172212
label.create_data_row(dataset, signer)
@@ -178,8 +218,16 @@ def _add_to_dataset(label: Label):
178218
def add_url_to_masks(self, signer: Callable[[bytes],
179219
str]) -> "LabelGenerator":
180220
"""
181-
Updates masks to have `url` attribute
182-
Doesn't update masks that already have urls
221+
Creates signed urls for all masks in the LabelGenerator.
222+
Multiple masks can reference the same RasterData mask so this makes sure we only upload that url once.
223+
Only uploads url if one doesn't already exist.
224+
225+
Args:
226+
signer: A function that accepts bytes and returns a signed url.
227+
max_concurrency: how many threads to use for uploading.
228+
Should be balanced to match the signing services capabilities.
229+
Returns:
230+
LabelGenerator that updates references to the new mask urls as data is accessed
183231
"""
184232

185233
def _add_url_to_masks(label: Label):
@@ -189,14 +237,22 @@ def _add_url_to_masks(label: Label):
189237
self._fns['add_url_to_masks'] = _add_url_to_masks
190238
return self
191239

240+
def __iter__(self):
241+
return self
242+
243+
def _process(self, value):
244+
for fn in self._fns.values():
245+
value = fn(value)
246+
return value
247+
192248
def __next__(self):
193249
"""
194-
- Double check that all values have been set.
195-
- Items could have been processed before any of these modifying functions are called.
196-
- None of these functions do anything if run more than once so the cost is minimal.
250+
Double checks that all values have been set.
251+
Items could have been processed before any of these modifying functions are called.
252+
None of these functions do anything if run more than once so the cost is minimal.
197253
"""
198254
value = super().__next__()
199-
return self.process(value)
255+
return self._process(value)
200256

201257

202258
LabelData = Union[LabelCollection, LabelGenerator]

labelbox/data/annotation_types/data/base_data.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,9 @@
44

55

66
class BaseData(BaseModel):
7+
"""
8+
Base class for objects representing data.
9+
This class shouldn't directly be used
10+
"""
711
external_id: Optional[str] = None
812
uid: Optional[str] = None

0 commit comments

Comments
 (0)