Skip to content

Commit 1697795

Browse files
authored
Merge pull request #736 from Labelbox/mkozik/batch-consensus-settings
[QQC-755] Allow to pass batch consensus settings when creating a batch
2 parents d29788c + 816b811 commit 1697795

File tree

8 files changed

+57
-13
lines changed

8 files changed

+57
-13
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# In progress
44
## Changed
55
* Default behavior for metrics to not include subclasses in the calculation.
6+
* Updated `create_batch` method to accept consensus settings.
67

78
## Fixed
89
* Polygon extraction from masks creating invalid polygons. This would cause issues in the coco converter.

labelbox/schema/batch.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class Batch(DbObject):
3232
created_at = Field.DateTime("created_at")
3333
updated_at = Field.DateTime("updated_at")
3434
size = Field.Int("size")
35+
consensus_settings = Field.Json("consensus_settings_json")
3536

3637
# Relationships
3738
created_by = Relationship.ToOne("User")
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from labelbox.utils import _CamelCaseMixin
2+
3+
4+
class ConsensusSettings(_CamelCaseMixin):
5+
"""Container for holding consensus quality settings
6+
7+
>>> ConsensusSettings(
8+
>>> number_of_labels = 2,
9+
>>> coverage_percentage = 0.2
10+
>>> )
11+
12+
Args:
13+
number_of_labels: Number of labels for consensus
14+
coverage_percentage: Percentage of data rows to be labeled more than once
15+
"""
16+
17+
number_of_labels: int
18+
coverage_percentage: float

labelbox/schema/data_row_metadata.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from pydantic import BaseModel, conlist, constr
99

1010
from labelbox.schema.ontology import SchemaId
11-
from labelbox.utils import camel_case
11+
from labelbox.utils import _CamelCaseMixin
1212

1313

1414
class DataRowMetadataKind(Enum):
@@ -36,13 +36,6 @@ class DataRowMetadataSchema(BaseModel):
3636
String: Type[str] = constr(max_length=500)
3737

3838

39-
class _CamelCaseMixin(BaseModel):
40-
41-
class Config:
42-
allow_population_by_field_name = True
43-
alias_generator = camel_case
44-
45-
4639
# Metadata base class
4740
class DataRowMetadataField(_CamelCaseMixin):
4841
schema_id: SchemaId

labelbox/schema/project.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from labelbox.orm.db_object import DbObject, Updateable, Deletable
1717
from labelbox.orm.model import Entity, Field, Relationship
1818
from labelbox.pagination import PaginatedCollection
19+
from labelbox.schema.consensus_settings import ConsensusSettings
1920
from labelbox.schema.media_type import MediaType
2021
from labelbox.schema.queue_mode import QueueMode
2122
from labelbox.schema.resource_tag import ResourceTag
@@ -561,14 +562,18 @@ def setup(self, labeling_frontend, labeling_frontend_options) -> None:
561562
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
562563
self.update(setup_complete=timestamp)
563564

564-
def create_batch(self, name: str, data_rows: List[str], priority: int = 5):
565+
def create_batch(self,
566+
name: str,
567+
data_rows: List[str],
568+
priority: int = 5,
569+
consensus_settings: Optional[Dict[str, float]] = None):
565570
"""Create a new batch for a project. Batches is in Beta and subject to change
566571
567572
Args:
568573
name: a name for the batch, must be unique within a project
569574
data_rows: Either a list of `DataRows` or Data Row ids
570575
priority: An optional priority for the Data Rows in the Batch. 1 highest -> 5 lowest
571-
576+
consensus_settings: An optional dictionary with consensus settings: {'number_of_labels': 3, 'coverage_percentage': 0.1}
572577
"""
573578

574579
# @TODO: make this automatic?
@@ -600,12 +605,16 @@ def create_batch(self, name: str, data_rows: List[str], priority: int = 5):
600605
}
601606
""" % (method, method, query.results_query_part(Entity.Batch))
602607

608+
if consensus_settings:
609+
consensus_settings = ConsensusSettings(**consensus_settings).dict(
610+
by_alias=True)
603611
params = {
604612
"projectId": self.uid,
605613
"batchInput": {
606614
"name": name,
607615
"dataRowIds": dr_ids,
608-
"priority": priority
616+
"priority": priority,
617+
"consensusSettings": consensus_settings
609618
}
610619
}
611620

labelbox/utils.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import re
2+
from pydantic import BaseModel
23

34

45
def _convert(s, sep, title):
@@ -23,3 +24,10 @@ def title_case(s):
2324
def snake_case(s):
2425
""" Converts a string in [snake|camel|title]case to snake_case. """
2526
return _convert(s, "_", lambda i: False)
27+
28+
29+
class _CamelCaseMixin(BaseModel):
30+
31+
class Config:
32+
allow_population_by_field_name = True
33+
alias_generator = camel_case

tests/integration/conftest.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,9 @@ def batch_project(client, rand_gen):
177177

178178
@pytest.fixture
179179
def consensus_project(client, rand_gen):
180-
project = client.create_project(name=rand_gen(str), auto_audit_percentage=0)
180+
project = client.create_project(name=rand_gen(str),
181+
auto_audit_percentage=0,
182+
queue_mode=QueueMode.Dataset)
181183
yield project
182184
project.delete()
183185

tests/integration/test_batch.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import pytest
22

33
from labelbox import Dataset, Project
4-
from labelbox.schema.queue_mode import QueueMode
54

65
IMAGE_URL = "https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000000034.jpg"
76

@@ -39,6 +38,19 @@ def test_create_batch(batch_project: Project, big_dataset: Dataset):
3938
assert batch.size == len(data_rows)
4039

4140

41+
def test_create_batch_with_consensus_settings(batch_project: Project,
42+
big_dataset: Dataset):
43+
data_rows = [dr.uid for dr in list(big_dataset.export_data_rows())]
44+
consensus_settings = {"coverage_percentage": 0.1, "number_of_labels": 3}
45+
batch = batch_project.create_batch("batch with consensus settings",
46+
data_rows,
47+
3,
48+
consensus_settings=consensus_settings)
49+
assert batch.name == "batch with consensus settings"
50+
assert batch.size == len(data_rows)
51+
assert batch.consensus_settings == consensus_settings
52+
53+
4254
def test_archive_batch(batch_project: Project, small_dataset: Dataset):
4355
data_rows = [dr.uid for dr in list(small_dataset.export_data_rows())]
4456
batch = batch_project.create_batch("batch to archive", data_rows)

0 commit comments

Comments
 (0)