Add Benchmark

florijanstamenkovic · florijanstamenkovic · commit 0fa39ed4ed9a · 2019-10-16T07:41:04.000+02:00
diff --git a/labelbox/orm/query.py b/labelbox/orm/query.py
@@ -603,3 +603,20 @@ def project_review_metrics(project, net_score):
     }""" % (project_id_param, project_id_param, net_score_literal)
 
     return query_str, {project_id_param: project.uid}
+
+
+def create_benchmark(label):
+    label_id_param = "labelId"
+    query_str = """mutation CreateBenchmarkPyApi($%s: ID!) {
+        createBenchmark(data: {labelId: $%s}) {%s}} """ % (
+            label_id_param, label_id_param,
+            results_query_part(Entity.named("Benchmark")))
+    return query_str, {label_id_param: label.uid}
+
+
+def delete_benchmark(label):
+    label_id_param = "labelId"
+    query_str = """mutation DeleteBenchmarkPyApi($%s: ID!) {
+        deleteBenchmark(where: {labelId: $%s}) {id}} """ % (
+            label_id_param, label_id_param)
+    return query_str, {label_id_param: label.uid}
diff --git a/labelbox/schema.py b/labelbox/schema.py
@@ -44,6 +44,7 @@ class Project(DbObject, Updateable, Deletable):
     labeling_parameter_overrides = Relationship.ToMany(
         "LabelingParameterOverride", False, "labeling_parameter_overrides")
     webhooks = Relationship.ToMany("Webhook", False)
+    benchmarks = Relationship.ToMany("Benchmark", False)
 
     def create_label(self, **kwargs):
         """ Creates a label on this project.
@@ -408,6 +409,15 @@ def create_review(self, **kwargs):
         kwargs[Review.project.name] = self.project()
         return self.client._create(Review, kwargs)
 
+    def create_benchmark(self):
+        """ Creates a Benchmark for this Label.
+        Return:
+            The newly created Benchmark.
+        """
+        query_str, params = query.create_benchmark(self)
+        res = self.client.execute(query_str, params)
+        res = res["data"]["createBenchmark"]
+        return Benchmark(self.client, res)
 
 class Review(DbObject, Deletable, Updateable):
 
@@ -425,6 +435,26 @@ class NetScore(Enum):
     project = Relationship.ToOne("Project", False)
     label = Relationship.ToOne("Label", False)
 
+class Benchmark(DbObject):
+    """ Benchmarks (also known as Golden Standard) is a quality assurance tool
+    for training data. Training data quality is the measure of accuracy and
+    consistency of the training data. Benchmarks works by interspersing data
+    to be labeled, for which there is a benchmark label, to each person labeling.
+    These labeled data are compared against their respective benchmark and an
+    accuracy score between 0 and 100 percent is calculated.
+    """
+    created_at = Field.DateTime("created_at")
+    created_by = Relationship.ToOne("User", False, "created_by")
+    last_activity = Field.DateTime("last_activity")
+    average_agreement = Field.Float("average_agreement")
+    completed_count = Field.Int("completed_count")
+
+    reference_label = Relationship.ToOne("Label", False, "reference_label")
+
+    def delete(self):
+        query_str, params = query.delete_benchmark(self.reference_label())
+        self.client.execute(query_str, params)
+
 
 class AssetMetadata(DbObject):
     """ AssetMetadata is a datatype to provide extra context about an asset
diff --git a/tests/integration/test_benchmark.py b/tests/integration/test_benchmark.py
@@ -0,0 +1,27 @@
+IMG_URL = "https://picsum.photos/200/300"
+
+
+def test_benchmark(client, rand_gen):
+    project = client.create_project(name=rand_gen(str))
+    dataset = client.create_dataset(name=rand_gen(str), projects=project)
+    data_row = dataset.create_data_row(row_data=IMG_URL)
+    label = project.create_label(data_row=data_row, label="test",
+                                 seconds_to_label=0.0)
+    assert set(project.benchmarks()) == set()
+    assert label.is_benchmark_reference == False
+
+    benchmark = label.create_benchmark()
+    assert set(project.benchmarks()) == {benchmark}
+    assert benchmark.reference_label() == label
+    # Refresh label data to check it's benchmark reference
+    label = list(data_row.labels())[0]
+    assert label.is_benchmark_reference == True
+
+    benchmark.delete()
+    assert set(project.benchmarks()) == set()
+    # Refresh label data to check it's benchmark reference
+    label = list(data_row.labels())[0]
+    assert label.is_benchmark_reference == False
+
+    dataset.delete()
+    project.delete()