Skip to content

Commit 82468f1

Browse files
author
gdj0nes
committed
Update tests for enum delete
1 parent 38fa90d commit 82468f1

File tree

2 files changed

+131
-32
lines changed

2 files changed

+131
-32
lines changed

labelbox/schema/data_row_metadata.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,25 @@ def _batch_upsert(
275275
def bulk_delete(
276276
self, deletes: List[DeleteDataRowMetadata]
277277
) -> List[DataRowMetadataBatchResponse]:
278+
""" Delete metadata from a datarow by specifiying the fields you want to remove
279+
280+
>>> delete = DeleteDataRowMetadata(
281+
>>> data_row_id="datarow-id",
282+
>>> fields=[
283+
>>> "schema-id-1",
284+
>>> "schema-id-2"
285+
>>> ...
286+
>>> ]
287+
>>> )
288+
>>> mdo.batch_delete([metadata])
289+
290+
291+
Args:
292+
deletes:
293+
294+
Returns:
295+
296+
"""
278297

279298
if not len(deletes):
280299
raise ValueError("Empty list passed")
@@ -316,10 +335,10 @@ def _parse_upsert(
316335
parsed = _validate_parse_datetime(metadatum)
317336
elif schema.kind == DataRowMetadataKind.string:
318337
parsed = _validate_parse_text(metadatum)
319-
elif schema.kind == DataRowMetadataKind.enum:
320-
parsed = _validate_enum_parse(schema, metadatum)
321338
elif schema.kind == DataRowMetadataKind.embedding:
322339
parsed = _validate_parse_embedding(metadatum)
340+
elif schema.kind == DataRowMetadataKind.enum:
341+
parsed = _validate_enum_parse(schema, metadatum)
323342
elif schema.kind == DataRowMetadataKind.option:
324343
raise ValueError("An option id should not be as a schema id")
325344
else:
@@ -332,18 +351,22 @@ def _validate_delete(self, delete: DeleteDataRowMetadata):
332351
if not len(delete.fields):
333352
raise ValueError(f"No fields specified for {delete.data_row_id}")
334353

354+
deletes = set()
335355
for schema_id in delete.fields:
336356
if schema_id not in self.all_fields_id_index:
337357
raise ValueError(
338358
f"Schema Id `{schema_id}` not found in ontology")
339359

340360
schema = self.all_fields_id_index[schema_id]
341-
# TODO: change server implementation to delete by parent only
342-
# if schema.kind == DataRowMetadataKind.option:
343-
# raise ValueError("Specify the parent to remove an option")
361+
# handle users specifying enums by adding all option enums
362+
if schema.kind == DataRowMetadataKind.enum:
363+
[deletes.add(o.id) for o in schema.options]
364+
365+
deletes.add(schema.id)
344366

345367
return _DeleteBatchDataRowMetadata(dataRowId=delete.data_row_id,
346-
schemaIds=delete.fields).dict()
368+
schemaIds=list(
369+
delete.fields)).dict()
347370

348371

349372
def _batch_items(iterable, size):

tests/integration/test_data_row_metadata.py

Lines changed: 102 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717

1818
@pytest.fixture
19-
def dr_md_ontology(client):
19+
def mdo(client):
2020
yield client.get_data_row_metadata_ontology()
2121

2222

@@ -52,8 +52,7 @@ def make_metadata(dr_id) -> DataRowMetadata:
5252
return metadata
5353

5454

55-
def test_get_datarow_metadata_ontology(dr_md_ontology):
56-
mdo = dr_md_ontology
55+
def test_get_datarow_metadata_ontology(mdo):
5756
assert len(mdo.all_fields)
5857
assert len(mdo.reserved_fields)
5958
assert len(mdo.custom_fields) == 0
@@ -66,61 +65,114 @@ def test_get_datarow_metadata(datarow):
6665
assert len(md)
6766

6867

69-
def test_bulk_upsert_datarow_metadata(datarow, dr_md_ontology):
68+
def test_bulk_upsert_datarow_metadata(datarow, mdo):
7069
assert not len(datarow.metadata["fields"])
7170
metadata = make_metadata(datarow.uid)
72-
dr_md_ontology.bulk_upsert([metadata])
71+
mdo.bulk_upsert([metadata])
7372
assert len(datarow.metadata["fields"])
7473

7574

7675
@pytest.mark.slow
77-
def test_large_bulk_upsert_datarow_metadata(big_dataset, dr_md_ontology):
76+
def test_large_bulk_upsert_datarow_metadata(big_dataset, mdo):
7877
metadata = []
7978
for dr in big_dataset.export_data_rows():
8079
metadata.append(make_metadata(dr.uid))
81-
response = dr_md_ontology.bulk_upsert(metadata)
80+
response = mdo.bulk_upsert(metadata)
8281
assert response
8382

8483
for dr in big_dataset.export_data_rows():
8584
assert len(dr.metadata["fields"])
8685
break
8786

8887

89-
def test_bulk_delete_datarow_metadata(datarow: DataRow, dr_md_ontology):
90-
"""test bulk deletes for non non fields"""
88+
def test_bulk_delete_datarow_metadata(datarow, mdo):
89+
"""test bulk deletes for all fields
90+
91+
TODO: this fails because of the enum validation issue
92+
93+
"""
9194
assert not len(datarow.metadata["fields"])
95+
9296
metadata = make_metadata(datarow.uid)
93-
metadata.fields = [
94-
m for m in metadata.fields if m.schema_id != SPLIT_SCHEMA_ID
95-
]
96-
dr_md_ontology.bulk_upsert([metadata])
97+
mdo.bulk_upsert([metadata])
98+
9799
assert len(datarow.metadata["fields"])
98100

99-
dr_md_ontology.bulk_delete([
101+
mdo.bulk_delete([
100102
DeleteDataRowMetadata(data_row_id=datarow.uid,
101103
fields=[m.schema_id for m in metadata.fields])
102104
])
103-
assert not (len(datarow.metadata["fields"]))
105+
106+
assert not len(datarow.metadata["fields"])
107+
108+
109+
def test_bulk_partial_delete_datarow_metadata(datarow, mdo):
110+
"""Delete a single from metadata"""
111+
assert not len(datarow.metadata["fields"])
112+
113+
metadata = make_metadata(datarow.uid)
114+
mdo.bulk_upsert([metadata])
115+
116+
assert len(datarow.metadata["fields"])
117+
118+
mdo.bulk_delete([
119+
DeleteDataRowMetadata(data_row_id=datarow.uid, fields=[TEXT_SCHEMA_ID])
120+
])
121+
122+
assert len(datarow.metadata["fields"]) == 4
123+
124+
125+
@pytest.mark.slow
126+
def test_large_bulk_delete_datarow_metadata(big_dataset, mdo):
127+
metadata = []
128+
for dr in big_dataset.export_data_rows():
129+
metadata.append(
130+
DataRowMetadata(data_row_id=dr.uid,
131+
fields=[
132+
DataRowMetadataField(
133+
schema_id=EMBEDDING_SCHEMA_ID,
134+
value=[0.1] * 128),
135+
DataRowMetadataField(schema_id=TEXT_SCHEMA_ID,
136+
value="test-message")
137+
]))
138+
response = mdo.bulk_upsert(metadata)
139+
assert response
140+
141+
deletes = []
142+
for dr in big_dataset.export_data_rows():
143+
deletes.append(
144+
DeleteDataRowMetadata(
145+
data_row_id=dr.uid,
146+
fields=[
147+
EMBEDDING_SCHEMA_ID, #
148+
CAPTURE_DT_SCHEMA_ID
149+
]))
150+
151+
response = mdo.bulk_delete(deletes)
152+
assert response
153+
for dr in big_dataset.export_data_rows():
154+
assert len(dr.metadata["fields"]) == 1
155+
break
104156

105157

106-
def test_bulk_delete_datarow_enum_metadata(datarow: DataRow, dr_md_ontology):
158+
def test_bulk_delete_datarow_enum_metadata(datarow: DataRow, mdo):
107159
"""test bulk deletes for non non fields"""
108160
assert not len(datarow.metadata["fields"])
109161
metadata = make_metadata(datarow.uid)
110162
metadata.fields = [
111163
m for m in metadata.fields if m.schema_id == SPLIT_SCHEMA_ID
112164
]
113-
dr_md_ontology.bulk_upsert([metadata])
165+
mdo.bulk_upsert([metadata])
114166
assert len(datarow.metadata["fields"])
115167

116-
dr_md_ontology.bulk_delete([
168+
mdo.bulk_delete([
117169
DeleteDataRowMetadata(data_row_id=datarow.uid,
118-
fields=[TEST_SPLIT_ID, SPLIT_SCHEMA_ID])
170+
fields=[SPLIT_SCHEMA_ID])
119171
])
120-
assert not (len(datarow.metadata["fields"]))
172+
assert not len(datarow.metadata["fields"])
121173

122174

123-
def test_raise_enum_upsert_schema_error(datarow, dr_md_ontology):
175+
def test_raise_enum_upsert_schema_error(datarow, mdo):
124176
"""Setting an option id as the schema id will raise a Value Error"""
125177

126178
metadata = DataRowMetadata(data_row_id=datarow.uid,
@@ -129,10 +181,10 @@ def test_raise_enum_upsert_schema_error(datarow, dr_md_ontology):
129181
value=SPLIT_SCHEMA_ID),
130182
])
131183
with pytest.raises(ValueError):
132-
dr_md_ontology.bulk_upsert([metadata])
184+
mdo.bulk_upsert([metadata])
133185

134186

135-
def test_upsert_non_existent_schema_id(datarow, dr_md_ontology):
187+
def test_upsert_non_existent_schema_id(datarow, mdo):
136188
"""Raise error on non-existent schema id"""
137189
metadata = DataRowMetadata(data_row_id=datarow.uid,
138190
fields=[
@@ -141,10 +193,34 @@ def test_upsert_non_existent_schema_id(datarow, dr_md_ontology):
141193
value="message"),
142194
])
143195
with pytest.raises(ValueError):
144-
dr_md_ontology.bulk_upsert([metadata])
196+
mdo.bulk_upsert([metadata])
197+
198+
199+
def test_delete_non_existent_schema_id(datarow, mdo):
200+
assert not len(datarow.metadata["fields"])
201+
results = mdo.bulk_delete([
202+
DeleteDataRowMetadata(data_row_id=datarow.uid,
203+
fields=[EMBEDDING_SCHEMA_ID])
204+
])
205+
assert results
206+
207+
208+
@pytest.mark.slow
209+
def test_large_bulk_delete_non_existent_schema_id(big_dataset, mdo):
210+
deletes = []
211+
for dr in big_dataset.export_data_rows():
212+
deletes.append(
213+
DeleteDataRowMetadata(data_row_id=dr.uid,
214+
fields=[EMBEDDING_SCHEMA_ID]))
215+
response = mdo.bulk_delete(deletes)
216+
assert response
217+
218+
for dr in big_dataset.export_data_rows():
219+
assert not len(dr.metadata["fields"])
220+
break
145221

146222

147-
def test_parse_raw_metadata(dr_md_ontology):
223+
def test_parse_raw_metadata(mdo):
148224
example = {
149225
'data_row_id':
150226
'ckr6kkfx801ui0yrtg9fje8xh',
@@ -163,7 +239,7 @@ def test_parse_raw_metadata(dr_md_ontology):
163239
}]
164240
}
165241

166-
parsed = dr_md_ontology.parse_metadata([example])
242+
parsed = mdo.parse_metadata([example])
167243
assert len(parsed) == 1
168244
row = parsed[0]
169245
assert row.data_row_id == example["data_row_id"]

0 commit comments

Comments
 (0)