Skip to content

Commit dd4530a

Browse files
authored
Merge pull request #116 from Labelbox/ms/validation-part2
User input validation
2 parents 829884a + cc5b2cd commit dd4530a

File tree

10 files changed

+278
-40
lines changed

10 files changed

+278
-40
lines changed

labelbox/schema/asset_metadata.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from enum import Enum
2+
13
from labelbox.orm.db_object import DbObject
24
from labelbox.orm.model import Field
35

@@ -9,9 +11,16 @@ class AssetMetadata(DbObject):
911
meta_type (str): IMAGE, VIDEO, TEXT, or IMAGE_OVERLAY
1012
meta_value (str): URL to an external file or a string of text
1113
"""
12-
VIDEO = "VIDEO"
13-
IMAGE = "IMAGE"
14-
TEXT = "TEXT"
14+
15+
class MetaType(Enum):
16+
VIDEO = "VIDEO"
17+
IMAGE = "IMAGE"
18+
TEXT = "TEXT"
19+
IMAGE_OVERLAY = "IMAGE_OVERLAY"
20+
21+
# For backwards compatibility
22+
for topic in MetaType:
23+
vars()[topic.name] = topic.value
1524

1625
meta_type = Field.String("meta_type")
1726
meta_value = Field.String("meta_value")

labelbox/schema/data_row.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from labelbox.orm import query
22
from labelbox.orm.db_object import DbObject, Updateable, BulkDeletable
33
from labelbox.orm.model import Entity, Field, Relationship
4-
from labelbox.pagination import PaginatedCollection
4+
from labelbox.schema.asset_metadata import AssetMetadata
55

66

77
class DataRow(DbObject, Updateable, BulkDeletable):
@@ -34,6 +34,10 @@ class DataRow(DbObject, Updateable, BulkDeletable):
3434
metadata = Relationship.ToMany("AssetMetadata", False, "metadata")
3535
predictions = Relationship.ToMany("Prediction", False)
3636

37+
supported_meta_types = {
38+
meta_type.value for meta_type in AssetMetadata.MetaType
39+
}
40+
3741
@staticmethod
3842
def bulk_delete(data_rows):
3943
""" Deletes all the given DataRows.
@@ -55,11 +59,19 @@ def create_metadata(self, meta_type, meta_value):
5559
5660
Args:
5761
meta_type (str): Asset metadata type, must be one of:
58-
VIDEO, IMAGE, TEXT.
62+
VIDEO, IMAGE, TEXT, IMAGE_OVERLAY (AssetMetadata.MetaType)
5963
meta_value (str): Asset metadata value.
6064
Returns:
6165
`AssetMetadata` DB object.
66+
Raises:
67+
ValueError: meta_type must be one of the supported types.
6268
"""
69+
70+
if meta_type not in self.supported_meta_types:
71+
raise ValueError(
72+
f"meta_type must be one of {self.supported_meta_types}. Found {meta_type}"
73+
)
74+
6375
meta_type_param = "metaType"
6476
meta_value_param = "metaValue"
6577
data_row_id_param = "dataRowId"

labelbox/schema/dataset.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
1+
import os
12
import json
3+
import logging
4+
from itertools import islice
25
from multiprocessing.dummy import Pool as ThreadPool
3-
import os
46

57
from labelbox.exceptions import InvalidQueryError, ResourceNotFoundError, InvalidAttributeError
68
from labelbox.orm.db_object import DbObject, Updateable, Deletable
79
from labelbox.orm.model import Entity, Field, Relationship
810

11+
logger = logging.getLogger(__name__)
12+
913

1014
class Dataset(DbObject, Updateable, Deletable):
1115
""" A Dataset is a collection of DataRows.
@@ -163,12 +167,13 @@ def convert_item(item):
163167
task._user = user
164168
return task
165169

166-
def data_row_for_external_id(self, external_id):
170+
def data_rows_for_external_id(self, external_id, limit=10):
167171
""" Convenience method for getting a single `DataRow` belonging to this
168172
`Dataset` that has the given `external_id`.
169173
170174
Args:
171175
external_id (str): External ID of the sought `DataRow`.
176+
limit (int): The maximum number of data rows to return for the given external_id
172177
173178
Returns:
174179
A single `DataRow` with the given ID.
@@ -182,10 +187,32 @@ def data_row_for_external_id(self, external_id):
182187
where = DataRow.external_id == external_id
183188

184189
data_rows = self.data_rows(where=where)
185-
# Get at most two data_rows.
186-
data_rows = [row for row, _ in zip(data_rows, range(2))]
190+
# Get at most `limit` data_rows.
191+
data_rows = list(islice(data_rows, limit))
187192

188-
if len(data_rows) != 1:
193+
if not len(data_rows):
189194
raise ResourceNotFoundError(DataRow, where)
195+
return data_rows
196+
197+
def data_row_for_external_id(self, external_id):
198+
""" Convenience method for getting a single `DataRow` belonging to this
199+
`Dataset` that has the given `external_id`.
190200
201+
Args:
202+
external_id (str): External ID of the sought `DataRow`.
203+
204+
Returns:
205+
A single `DataRow` with the given ID.
206+
207+
Raises:
208+
labelbox.exceptions.ResourceNotFoundError: If there is no `DataRow`
209+
in this `DataSet` with the given external ID, or if there are
210+
multiple `DataRows` for it.
211+
"""
212+
data_rows = self.data_rows_for_external_id(external_id=external_id,
213+
limit=2)
214+
if len(data_rows) > 1:
215+
logger.warning(
216+
f"More than one data_row has the provided external_id : `%s`. Use function data_rows_for_external_id to fetch all",
217+
external_id)
191218
return data_rows[0]

labelbox/schema/project.py

Lines changed: 78 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
1-
from collections import namedtuple
2-
from datetime import datetime, timezone
31
import json
2+
import time
43
import logging
4+
from collections import namedtuple
5+
from datetime import datetime, timezone
56
from pathlib import Path
6-
import time
77
from typing import Dict, List, Union, Iterable
88
from urllib.parse import urlparse
99

1010
from labelbox import utils
1111
from labelbox.schema.bulk_import_request import BulkImportRequest
12+
from labelbox.schema.data_row import DataRow
1213
from labelbox.exceptions import InvalidQueryError
1314
from labelbox.orm import query
1415
from labelbox.orm.db_object import DbObject, Updateable, Deletable
@@ -88,6 +89,9 @@ def create_label(self, **kwargs):
8889
# deprecated and we don't want the Py client lib user to know
8990
# about them. At the same time they're connected to a Label at
9091
# label creation in a non-standard way (connect via name).
92+
logger.warning(
93+
"`create_label` is deprecated and is not compatible with the new editor."
94+
)
9195

9296
Label = Entity.Label
9397

@@ -196,7 +200,7 @@ def upsert_instructions(self, instructions_file: str):
196200
frontendId = frontend.uid
197201

198202
if frontend.name != "Editor":
199-
logger.warn(
203+
logger.warning(
200204
f"This function has only been tested to work with the Editor front end. Found %s",
201205
frontend.name)
202206

@@ -312,18 +316,66 @@ def setup(self, labeling_frontend, labeling_frontend_options):
312316
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
313317
self.update(setup_complete=timestamp)
314318

319+
def validate_labeling_parameter_overrides(self, data):
320+
for idx, row in enumerate(data):
321+
if len(row) != 3:
322+
raise TypeError(
323+
f"Data must be a list of tuples containing a DataRow, priority (int), num_labels (int). Found {len(row)} items. Index: {idx}"
324+
)
325+
data_row, priority, num_labels = row
326+
if not isinstance(data_row, DataRow):
327+
raise TypeError(
328+
f"data_row should be be of type DataRow. Found {type(data_row)}. Index: {idx}"
329+
)
330+
331+
for name, value in [["Priority", priority],
332+
["Number of labels", num_labels]]:
333+
if not isinstance(value, int):
334+
raise TypeError(
335+
f"{name} must be an int. Found {type(value)} for data_row {data_row}. Index: {idx}"
336+
)
337+
if value < 1:
338+
raise ValueError(
339+
f"{name} must be greater than 0 for data_row {data_row}. Index: {idx}"
340+
)
341+
315342
def set_labeling_parameter_overrides(self, data):
316343
""" Adds labeling parameter overrides to this project.
317-
344+
345+
See information on priority here:
346+
https://docs.labelbox.com/en/configure-editor/queue-system#reservation-system
347+
318348
>>> project.set_labeling_parameter_overrides([
319349
>>> (data_row_1, 2, 3), (data_row_2, 1, 4)])
320350
321351
Args:
322352
data (iterable): An iterable of tuples. Each tuple must contain
323-
(DataRow, priority, numberOfLabels) for the new override.
353+
(DataRow, priority<int>, number_of_labels<int>) for the new override.
354+
355+
Priority:
356+
* Data will be labeled in priority order.
357+
- A lower number priority is labeled first.
358+
- Minimum priority is 1.
359+
* Priority is not the queue position.
360+
- The position is determined by the relative priority.
361+
- E.g. [(data_row_1, 5,1), (data_row_2, 2,1), (data_row_3, 10,1)]
362+
will be assigned in the following order: [data_row_2, data_row_1, data_row_3]
363+
* Datarows with parameter overrides will appear before datarows without overrides.
364+
* The priority only effects items in the queue.
365+
- Assigning a priority will not automatically add the item back into the queue.
366+
Number of labels:
367+
* The number of times a data row should be labeled.
368+
- Creates duplicate data rows in a project (one for each number of labels).
369+
* New duplicated data rows will be added to the queue.
370+
- Already labeled duplicates will not be sent back to the queue.
371+
* The queue will never assign the same datarow to a single labeler more than once.
372+
- If the number of labels is greater than the number of labelers working on a project then
373+
the extra items will remain in the queue (this can be fixed by removing the override at any time).
374+
* Setting this to 1 will result in the default behavior (no duplicates).
324375
Returns:
325376
bool, indicates if the operation was a success.
326377
"""
378+
self.validate_labeling_parameter_overrides(data)
327379
data_str = ",\n".join(
328380
"{dataRow: {id: \"%s\"}, priority: %d, numLabels: %d }" %
329381
(data_row.uid, priority, num_labels)
@@ -338,6 +390,8 @@ def set_labeling_parameter_overrides(self, data):
338390
def unset_labeling_parameter_overrides(self, data_rows):
339391
""" Removes labeling parameter overrides to this project.
340392
393+
* This will remove unlabeled duplicates in the queue.
394+
341395
Args:
342396
data_rows (iterable): An iterable of DataRows.
343397
Returns:
@@ -353,12 +407,19 @@ def unset_labeling_parameter_overrides(self, data_rows):
353407
return res["project"]["unsetLabelingParameterOverrides"]["success"]
354408

355409
def upsert_review_queue(self, quota_factor):
356-
""" Reinitiates the review queue for this project.
410+
""" Sets the the proportion of total assets in a project to review.
411+
412+
More information can be found here:
413+
https://docs.labelbox.com/en/quality-assurance/review-labels#configure-review-percentage
357414
358415
Args:
359416
quota_factor (float): Which part (percentage) of the queue
360417
to reinitiate. Between 0 and 1.
361418
"""
419+
420+
if not 0. < quota_factor < 1.:
421+
raise ValueError("Quota factor must be in the range of [0,1]")
422+
362423
id_param = "projectId"
363424
quota_param = "quotaFactor"
364425
query_str = """mutation UpsertReviewQueuePyApi($%s: ID!, $%s: Float!){
@@ -373,7 +434,6 @@ def upsert_review_queue(self, quota_factor):
373434
def extend_reservations(self, queue_type):
374435
""" Extends all the current reservations for the current user on the given
375436
queue type.
376-
377437
Args:
378438
queue_type (str): Either "LabelingQueue" or "ReviewQueue"
379439
Returns:
@@ -398,6 +458,11 @@ def create_prediction_model(self, name, version):
398458
Returns:
399459
A newly created PredictionModel.
400460
"""
461+
462+
logger.warning(
463+
"`create_prediction_model` is deprecated and is not compatible with the new editor."
464+
)
465+
401466
PM = Entity.PredictionModel
402467
model = self.client._create(PM, {
403468
PM.name.name: name,
@@ -423,6 +488,10 @@ def create_prediction(self, label, data_row, prediction_model=None):
423488
is None and this Project's active_prediction_model is also
424489
None.
425490
"""
491+
logger.warning(
492+
"`create_prediction` is deprecated and is not compatible with the new editor."
493+
)
494+
426495
if prediction_model is None:
427496
prediction_model = self.active_prediction_model()
428497
if prediction_model is None:
@@ -495,6 +564,7 @@ def upload_annotations(
495564
Returns:
496565
BulkImportRequest
497566
"""
567+
498568
if isinstance(annotations, str) or isinstance(annotations, Path):
499569

500570
def _is_url_valid(url: Union[str, Path]) -> bool:

0 commit comments

Comments
 (0)