Skip to content

Commit ea34821

Browse files
author
Kevin Kim
committed
Add 'metadata' field representing data row metadata as DataRowMetadataFields
1 parent e6a19d4 commit ea34821

File tree

5 files changed

+111
-48
lines changed

5 files changed

+111
-48
lines changed

labelbox/orm/db_object.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,15 @@ def _set_field_values(self, field_values):
7070
"field %s", value, field)
7171
elif isinstance(field.field_type, Field.EnumType):
7272
value = field.field_type.enum_cls(value)
73+
elif isinstance(field.field_type,
74+
Field.ListType) and field.name == "metadata":
75+
mdo = self.client.get_data_row_metadata_ontology()
76+
try:
77+
value = mdo.parse_metadata_fields(value)
78+
except ValueError:
79+
logger.warning(
80+
"Failed to convert value '%s' to metadata for field %s",
81+
value, field)
7382
setattr(self, field.name, value)
7483

7584
def __repr__(self):

labelbox/schema/data_row.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,15 @@ class DataRow(DbObject, Updateable, BulkDeletable):
3636
created_at = Field.DateTime("created_at")
3737
media_attributes = Field.Json("media_attributes")
3838
metadata_fields = Field.List(
39-
DataRowMetadataField,
39+
dict,
4040
graphql_type="DataRowCustomMetadataUpsertInput!",
4141
name="metadata_fields",
4242
result_subquery="metadataFields { schemaId name value kind }")
43+
metadata = Field.List(DataRowMetadataField,
44+
graphql_type="DataRowCustomMetadataUpsertInput!",
45+
name="metadata",
46+
graphql_name="customMetadata",
47+
result_subquery="customMetadata { schemaId value }")
4348

4449
# Relationships
4550
dataset = Relationship.ToOne("Dataset")

labelbox/schema/data_row_metadata.py

Lines changed: 33 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# type: ignore
22
from datetime import datetime
3-
import warnings
43
from copy import deepcopy
54
from enum import Enum
65
from itertools import chain
@@ -224,34 +223,43 @@ def parse_metadata(
224223

225224
for dr in unparsed:
226225
fields = []
227-
for f in dr["fields"]:
228-
if f["schemaId"] not in self.fields_by_id:
229-
# Update metadata ontology if field can't be found
230-
self.refresh_ontology()
231-
if f["schemaId"] not in self.fields_by_id:
232-
raise ValueError(
233-
f"Schema Id `{f['schemaId']}` not found in ontology"
234-
)
235-
236-
schema = self.fields_by_id[f["schemaId"]]
237-
if schema.kind == DataRowMetadataKind.enum:
238-
continue
239-
elif schema.kind == DataRowMetadataKind.option:
240-
field = DataRowMetadataField(schema_id=schema.parent,
241-
value=schema.uid)
242-
elif schema.kind == DataRowMetadataKind.datetime:
243-
field = DataRowMetadataField(
244-
schema_id=schema.uid,
245-
value=datetime.fromisoformat(f["value"][:-1] +
246-
"+00:00"))
247-
else:
248-
field = DataRowMetadataField(schema_id=schema.uid,
249-
value=f["value"])
250-
fields.append(field)
226+
if "fields" in dr:
227+
fields = self.parse_metadata_fields(dr["fields"])
251228
parsed.append(
252229
DataRowMetadata(data_row_id=dr["dataRowId"], fields=fields))
253230
return parsed
254231

232+
def parse_metadata_fields(
233+
self, unparsed: List[Dict[str,
234+
Dict]]) -> List[DataRowMetadataField]:
235+
parsed = []
236+
if isinstance(unparsed, dict):
237+
raise ValueError("Pass a list of dictionaries")
238+
239+
for f in unparsed:
240+
if f["schemaId"] not in self.fields_by_id:
241+
# Update metadata ontology if field can't be found
242+
self.refresh_ontology()
243+
if f["schemaId"] not in self.fields_by_id:
244+
raise ValueError(
245+
f"Schema Id `{f['schemaId']}` not found in ontology")
246+
247+
schema = self.fields_by_id[f["schemaId"]]
248+
if schema.kind == DataRowMetadataKind.enum:
249+
continue
250+
elif schema.kind == DataRowMetadataKind.option:
251+
field = DataRowMetadataField(schema_id=schema.parent,
252+
value=schema.uid)
253+
elif schema.kind == DataRowMetadataKind.datetime:
254+
field = DataRowMetadataField(
255+
schema_id=schema.uid,
256+
value=datetime.fromisoformat(f["value"][:-1] + "+00:00"))
257+
else:
258+
field = DataRowMetadataField(schema_id=schema.uid,
259+
value=f["value"])
260+
parsed.append(field)
261+
return parsed
262+
255263
def bulk_upsert(
256264
self, metadata: List[DataRowMetadata]
257265
) -> List[DataRowMetadataBatchResponse]:

tests/integration/test_data_row_metadata.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import time
21
from datetime import datetime
32

43
import pytest
@@ -282,3 +281,34 @@ def test_parse_raw_metadata(mdo):
282281
for row in parsed:
283282
for field in row.fields:
284283
assert mdo._parse_upsert(field)
284+
285+
286+
def test_parse_raw_metadata_fields(mdo):
287+
example = [
288+
{
289+
'schemaId': 'cko8s9r5v0001h2dk9elqdidh',
290+
'value': 'my-new-message'
291+
},
292+
{
293+
'schemaId': 'cko8sbczn0002h2dkdaxb5kal',
294+
'value': {}
295+
},
296+
{
297+
'schemaId': 'cko8sbscr0003h2dk04w86hof',
298+
'value': {}
299+
},
300+
{
301+
'schemaId': 'cko8sdzv70006h2dk8jg64zvb',
302+
'value': '2021-07-20T21:41:14.606710Z'
303+
},
304+
{
305+
'schemaId': FAKE_SCHEMA_ID,
306+
'value': 0.5
307+
},
308+
]
309+
310+
parsed = mdo.parse_metadata_fields(example)
311+
assert len(parsed) == 4
312+
313+
for field in parsed:
314+
assert mdo._parse_upsert(field)

tests/integration/test_data_rows.py

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
1-
import imghdr
21
from tempfile import NamedTemporaryFile
32
import uuid
4-
import time
53
from datetime import datetime
64

75
import pytest
@@ -22,6 +20,14 @@
2220
].sort()
2321

2422

23+
@pytest.fixture
24+
def mdo(client):
25+
mdo = client.get_data_row_metadata_ontology()
26+
mdo._raw_ontology = mdo._get_ontology()
27+
mdo._build_ontology()
28+
yield mdo
29+
30+
2531
def make_metadata_fields():
2632
embeddings = [0.0] * 128
2733
msg = "A message"
@@ -57,12 +63,6 @@ def make_metadata_fields_dict():
5763
return fields
5864

5965

60-
def filter_precomputed_embeddings(metadata_fields):
61-
return list(
62-
filter(lambda md: md["name"] != "precomputedImageEmbedding",
63-
metadata_fields))
64-
65-
6666
def test_get_data_row(datarow, client):
6767
assert client.get_data_row(datarow.uid)
6868

@@ -235,7 +235,7 @@ def test_create_data_row_with_invalid_input(dataset, image_url):
235235
dataset.create_data_row(dr, row_data=image_url)
236236

237237

238-
def test_create_data_row_with_metadata(dataset, image_url):
238+
def test_create_data_row_with_metadata(mdo, dataset, image_url):
239239
client = dataset.client
240240
assert len(list(dataset.data_rows())) == 0
241241

@@ -249,13 +249,17 @@ def test_create_data_row_with_metadata(dataset, image_url):
249249
assert requests.get(image_url).content == \
250250
requests.get(data_row.row_data).content
251251
assert data_row.media_attributes is not None
252-
filtered_md_fields = filter_precomputed_embeddings(data_row.metadata_fields)
253-
assert len(filtered_md_fields) == 4
254-
assert [m["schemaId"] for m in filtered_md_fields
252+
metadata_fields = data_row.metadata_fields
253+
metadata = data_row.metadata
254+
assert len(metadata_fields) == 4
255+
assert len(metadata) == 4
256+
assert [m["schemaId"] for m in metadata_fields
255257
].sort() == EXPECTED_METADATA_SCHEMA_IDS
258+
for m in metadata:
259+
assert mdo._parse_upsert(m)
256260

257261

258-
def test_create_data_row_with_metadata_dict(dataset, image_url):
262+
def test_create_data_row_with_metadata_dict(mdo, dataset, image_url):
259263
client = dataset.client
260264
assert len(list(dataset.data_rows())) == 0
261265

@@ -269,10 +273,14 @@ def test_create_data_row_with_metadata_dict(dataset, image_url):
269273
assert requests.get(image_url).content == \
270274
requests.get(data_row.row_data).content
271275
assert data_row.media_attributes is not None
272-
filtered_md_fields = filter_precomputed_embeddings(data_row.metadata_fields)
273-
assert len(filtered_md_fields) == 4
274-
assert [m["schemaId"] for m in filtered_md_fields
276+
metadata_fields = data_row.metadata_fields
277+
metadata = data_row.metadata
278+
assert len(metadata_fields) == 4
279+
assert len(metadata) == 4
280+
assert [m["schemaId"] for m in metadata_fields
275281
].sort() == EXPECTED_METADATA_SCHEMA_IDS
282+
for m in metadata:
283+
assert mdo._parse_upsert(m)
276284

277285

278286
def test_create_data_row_with_invalid_metadata(dataset, image_url):
@@ -284,7 +292,7 @@ def test_create_data_row_with_invalid_metadata(dataset, image_url):
284292
dataset.create_data_row(row_data=image_url, metadata_fields=fields)
285293

286294

287-
def test_create_data_rows_with_metadata(dataset, image_url):
295+
def test_create_data_rows_with_metadata(mdo, dataset, image_url):
288296
client = dataset.client
289297
assert len(list(dataset.data_rows())) == 0
290298

@@ -322,11 +330,14 @@ def test_create_data_rows_with_metadata(dataset, image_url):
322330
requests.get(row.row_data).content
323331
assert row.media_attributes is not None
324332

325-
# Remove 'precomputedImageEmbedding' metadata if automatically added
326-
filtered_md_fields = filter_precomputed_embeddings(row.metadata_fields)
327-
assert len(filtered_md_fields) == 4
328-
assert [m["schemaId"] for m in filtered_md_fields
333+
metadata_fields = row.metadata_fields
334+
metadata = row.metadata
335+
assert len(metadata_fields) == 4
336+
assert len(metadata) == 4
337+
assert [m["schemaId"] for m in metadata_fields
329338
].sort() == EXPECTED_METADATA_SCHEMA_IDS
339+
for m in metadata:
340+
assert mdo._parse_upsert(m)
330341

331342

332343
def test_create_data_rows_with_invalid_metadata(dataset, image_url):

0 commit comments

Comments
 (0)