Skip to content

Commit a939c4d

Browse files
author
Kevin Kim
committed
Fix failing metadata related tests due to embeddings sunset
1 parent 4609344 commit a939c4d

File tree

3 files changed

+30
-66
lines changed

3 files changed

+30
-66
lines changed

tests/integration/test_data_row_metadata.py

Lines changed: 15 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,8 @@
1414
SPLIT_SCHEMA_ID = "cko8sbczn0002h2dkdaxb5kal"
1515
TRAIN_SPLIT_ID = "cko8sbscr0003h2dk04w86hof"
1616
TEST_SPLIT_ID = "cko8scbz70005h2dkastwhgqt"
17-
EMBEDDING_SCHEMA_ID = "ckpyije740000yxdk81pbgjdc"
1817
TEXT_SCHEMA_ID = "cko8s9r5v0001h2dk9elqdidh"
1918
CAPTURE_DT_SCHEMA_ID = "cko8sdzv70006h2dk8jg64zvb"
20-
PRE_COMPUTED_EMBEDDINGS_ID = 'ckrzang79000008l6hb5s6za1'
2119
CUSTOM_TEXT_SCHEMA_NAME = 'custom_text'
2220

2321
FAKE_NUMBER_FIELD = {
@@ -56,7 +54,6 @@ def big_dataset(dataset: Dataset, image_url):
5654

5755

5856
def make_metadata(dr_id) -> DataRowMetadata:
59-
embeddings = [0.0] * 128
6057
msg = "A message"
6158
time = datetime.utcnow()
6259

@@ -67,14 +64,11 @@ def make_metadata(dr_id) -> DataRowMetadata:
6764
value=TEST_SPLIT_ID),
6865
DataRowMetadataField(schema_id=CAPTURE_DT_SCHEMA_ID, value=time),
6966
DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value=msg),
70-
DataRowMetadataField(schema_id=EMBEDDING_SCHEMA_ID,
71-
value=embeddings),
7267
])
7368
return metadata
7469

7570

7671
def make_named_metadata(dr_id) -> DataRowMetadata:
77-
embeddings = [0.0] * 128
7872
msg = "A message"
7973
time = datetime.utcnow()
8074

@@ -86,8 +80,6 @@ def make_named_metadata(dr_id) -> DataRowMetadata:
8680
value=time),
8781
DataRowMetadataField(
8882
name=CUSTOM_TEXT_SCHEMA_NAME, value=msg),
89-
DataRowMetadataField(name='embedding',
90-
value=embeddings),
9183
])
9284
return metadata
9385

@@ -127,10 +119,7 @@ def test_bulk_upsert_datarow_metadata(datarow, mdo: DataRowMetadataOntology):
127119
mdo.bulk_upsert([metadata])
128120
exported = mdo.bulk_export([datarow.uid])
129121
assert len(exported)
130-
assert len([
131-
field for field in exported[0].fields
132-
if field.schema_id != PRE_COMPUTED_EMBEDDINGS_ID
133-
]) == 4
122+
assert len([field for field in exported[0].fields]) == 3
134123

135124

136125
@pytest.mark.slow
@@ -147,10 +136,8 @@ def test_large_bulk_upsert_datarow_metadata(big_dataset, mdo):
147136
for metadata in mdo.bulk_export(data_row_ids)
148137
}
149138
for data_row_id in data_row_ids:
150-
assert len([
151-
f for f in metadata_lookup.get(data_row_id).fields
152-
if f.schema_id != PRE_COMPUTED_EMBEDDINGS_ID
153-
]), metadata_lookup.get(data_row_id).fields
139+
assert len([f for f in metadata_lookup.get(data_row_id).fields
140+
]), metadata_lookup.get(data_row_id).fields
154141

155142

156143
def test_upsert_datarow_metadata_by_name(datarow, mdo):
@@ -162,10 +149,8 @@ def test_upsert_datarow_metadata_by_name(datarow, mdo):
162149
metadata.data_row_id: metadata
163150
for metadata in mdo.bulk_export([datarow.uid])
164151
}
165-
assert len([
166-
f for f in metadata_lookup.get(datarow.uid).fields
167-
if f.schema_id != PRE_COMPUTED_EMBEDDINGS_ID
168-
]), metadata_lookup.get(datarow.uid).fields
152+
assert len([f for f in metadata_lookup.get(datarow.uid).fields
153+
]), metadata_lookup.get(datarow.uid).fields
169154

170155

171156
def test_upsert_datarow_metadata_option_by_name(datarow, mdo):
@@ -220,10 +205,7 @@ def test_bulk_partial_delete_datarow_metadata(datarow, mdo):
220205
mdo.bulk_delete([
221206
DeleteDataRowMetadata(data_row_id=datarow.uid, fields=[TEXT_SCHEMA_ID])
222207
])
223-
fields = [
224-
f for f in mdo.bulk_export([datarow.uid])[0].fields
225-
if f.schema_id != PRE_COMPUTED_EMBEDDINGS_ID
226-
]
208+
fields = [f for f in mdo.bulk_export([datarow.uid])[0].fields]
227209
assert len(fields) == (len(metadata.fields) - 1)
228210

229211

@@ -234,9 +216,8 @@ def test_large_bulk_delete_datarow_metadata(big_dataset, mdo):
234216
metadata.append(
235217
DataRowMetadata(data_row_id=data_row_id,
236218
fields=[
237-
DataRowMetadataField(
238-
schema_id=EMBEDDING_SCHEMA_ID,
239-
value=[0.1] * 128),
219+
DataRowMetadataField(schema_id=SPLIT_SCHEMA_ID,
220+
value=TEST_SPLIT_ID),
240221
DataRowMetadataField(schema_id=TEXT_SCHEMA_ID,
241222
value="test-message")
242223
]))
@@ -248,19 +229,13 @@ def test_large_bulk_delete_datarow_metadata(big_dataset, mdo):
248229
deletes.append(
249230
DeleteDataRowMetadata(
250231
data_row_id=data_row_id,
251-
fields=[
252-
EMBEDDING_SCHEMA_ID, #
253-
CAPTURE_DT_SCHEMA_ID
254-
]))
232+
fields=[SPLIT_SCHEMA_ID, CAPTURE_DT_SCHEMA_ID]))
255233
errors = mdo.bulk_delete(deletes)
256234
assert len(errors) == 0
257235
for data_row_id in data_row_ids:
258-
fields = [
259-
f for f in mdo.bulk_export([data_row_id])[0].fields
260-
if f.schema_id != PRE_COMPUTED_EMBEDDINGS_ID
261-
]
236+
fields = [f for f in mdo.bulk_export([data_row_id])[0].fields]
262237
assert len(fields) == 1, fields
263-
assert EMBEDDING_SCHEMA_ID not in [field.schema_id for field in fields]
238+
assert SPLIT_SCHEMA_ID not in [field.schema_id for field in fields]
264239

265240

266241
def test_bulk_delete_datarow_enum_metadata(datarow: DataRow, mdo):
@@ -280,8 +255,7 @@ def test_bulk_delete_datarow_enum_metadata(datarow: DataRow, mdo):
280255
DeleteDataRowMetadata(data_row_id=datarow.uid, fields=[SPLIT_SCHEMA_ID])
281256
])
282257
exported = mdo.bulk_export([datarow.uid])[0].fields
283-
assert len(
284-
[f for f in exported if f.schema_id != PRE_COMPUTED_EMBEDDINGS_ID]) == 0
258+
assert len(exported) == 0
285259

286260

287261
def test_raise_enum_upsert_schema_error(datarow, mdo):
@@ -309,11 +283,10 @@ def test_upsert_non_existent_schema_id(datarow, mdo):
309283

310284

311285
def test_delete_non_existent_schema_id(datarow, mdo):
312-
mdo.bulk_delete([
313-
DeleteDataRowMetadata(data_row_id=datarow.uid,
314-
fields=[EMBEDDING_SCHEMA_ID])
286+
res = mdo.bulk_delete([
287+
DeleteDataRowMetadata(data_row_id=datarow.uid, fields=[SPLIT_SCHEMA_ID])
315288
])
316-
# No message is returned
289+
assert len(res) == 0
317290

318291

319292
def test_parse_raw_metadata(mdo):

tests/integration/test_data_rows.py

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,10 @@
1414

1515
SPLIT_SCHEMA_ID = "cko8sbczn0002h2dkdaxb5kal"
1616
TEST_SPLIT_ID = "cko8scbz70005h2dkastwhgqt"
17-
EMBEDDING_SCHEMA_ID = "ckpyije740000yxdk81pbgjdc"
1817
TEXT_SCHEMA_ID = "cko8s9r5v0001h2dk9elqdidh"
1918
CAPTURE_DT_SCHEMA_ID = "cko8sdzv70006h2dk8jg64zvb"
2019
EXPECTED_METADATA_SCHEMA_IDS = [
21-
SPLIT_SCHEMA_ID, TEST_SPLIT_ID, EMBEDDING_SCHEMA_ID, TEXT_SCHEMA_ID,
22-
CAPTURE_DT_SCHEMA_ID
20+
SPLIT_SCHEMA_ID, TEST_SPLIT_ID, TEXT_SCHEMA_ID, CAPTURE_DT_SCHEMA_ID
2321
].sort()
2422
CUSTOM_TEXT_SCHEMA_NAME = "custom_text"
2523

@@ -88,21 +86,18 @@ def tile_content():
8886

8987

9088
def make_metadata_fields():
91-
embeddings = [0.0] * 128
9289
msg = "A message"
9390
time = datetime.utcnow()
9491

9592
fields = [
9693
DataRowMetadataField(schema_id=SPLIT_SCHEMA_ID, value=TEST_SPLIT_ID),
9794
DataRowMetadataField(schema_id=CAPTURE_DT_SCHEMA_ID, value=time),
9895
DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value=msg),
99-
DataRowMetadataField(schema_id=EMBEDDING_SCHEMA_ID, value=embeddings),
10096
]
10197
return fields
10298

10399

104100
def make_metadata_fields_dict():
105-
embeddings = [0.0] * 128
106101
msg = "A message"
107102
time = datetime.utcnow()
108103

@@ -115,9 +110,6 @@ def make_metadata_fields_dict():
115110
}, {
116111
"schema_id": TEXT_SCHEMA_ID,
117112
"value": msg
118-
}, {
119-
"schema_id": EMBEDDING_SCHEMA_ID,
120-
"value": embeddings
121113
}]
122114
return fields
123115

@@ -312,8 +304,8 @@ def test_create_data_row_with_metadata(mdo, dataset, image_url):
312304
assert data_row.media_attributes is not None
313305
metadata_fields = data_row.metadata_fields
314306
metadata = data_row.metadata
315-
assert len(metadata_fields) == 4
316-
assert len(metadata) == 4
307+
assert len(metadata_fields) == 3
308+
assert len(metadata) == 3
317309
assert [m["schemaId"] for m in metadata_fields
318310
].sort() == EXPECTED_METADATA_SCHEMA_IDS
319311
for m in metadata:
@@ -336,8 +328,8 @@ def test_create_data_row_with_metadata_dict(mdo, dataset, image_url):
336328
assert data_row.media_attributes is not None
337329
metadata_fields = data_row.metadata_fields
338330
metadata = data_row.metadata
339-
assert len(metadata_fields) == 4
340-
assert len(metadata) == 4
331+
assert len(metadata_fields) == 3
332+
assert len(metadata) == 3
341333
assert [m["schemaId"] for m in metadata_fields
342334
].sort() == EXPECTED_METADATA_SCHEMA_IDS
343335
for m in metadata:
@@ -347,7 +339,7 @@ def test_create_data_row_with_metadata_dict(mdo, dataset, image_url):
347339
def test_create_data_row_with_invalid_metadata(dataset, image_url):
348340
fields = make_metadata_fields()
349341
fields.append(
350-
DataRowMetadataField(schema_id=EMBEDDING_SCHEMA_ID, value=[0.0] * 128))
342+
DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value='some msg'))
351343

352344
with pytest.raises(labelbox.exceptions.MalformedQueryException):
353345
dataset.create_data_row(row_data=image_url, metadata_fields=fields)
@@ -393,8 +385,8 @@ def test_create_data_rows_with_metadata(mdo, dataset, image_url):
393385

394386
metadata_fields = row.metadata_fields
395387
metadata = row.metadata
396-
assert len(metadata_fields) == 4
397-
assert len(metadata) == 4
388+
assert len(metadata_fields) == 3
389+
assert len(metadata) == 3
398390
assert [m["schemaId"] for m in metadata_fields
399391
].sort() == EXPECTED_METADATA_SCHEMA_IDS
400392
for m in metadata:
@@ -479,7 +471,7 @@ def create_data_row(data_rows):
479471
def test_create_data_rows_with_invalid_metadata(dataset, image_url):
480472
fields = make_metadata_fields()
481473
fields.append(
482-
DataRowMetadataField(schema_id=EMBEDDING_SCHEMA_ID, value=[0.0] * 128))
474+
DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value='some msg'))
483475

484476
task = dataset.create_data_rows([{
485477
DataRow.row_data: image_url,
@@ -730,7 +722,7 @@ def test_create_data_rows_local_file(dataset, sample_image):
730722
assert task.status == "COMPLETE"
731723
data_row = list(dataset.data_rows())[0]
732724
assert data_row.external_id == "tests/integration/media/sample_image.jpg"
733-
assert len(data_row.metadata_fields) == 4
725+
assert len(data_row.metadata_fields) == 3
734726

735727

736728
def test_data_row_with_global_key(dataset, sample_image):

tests/integration/test_task.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,20 @@
33
from labelbox import DataRow
44
from labelbox.schema.data_row_metadata import DataRowMetadataField
55

6-
EMBEDDING_SCHEMA_ID = "ckpyije740000yxdk81pbgjdc"
6+
TEXT_SCHEMA_ID = "cko8s9r5v0001h2dk9elqdidh"
77

88

99
def test_task_errors(dataset, image_url):
1010
client = dataset.client
11-
embeddings = [0.0] * 128
1211
task = dataset.create_data_rows([
1312
{
1413
DataRow.row_data:
1514
image_url,
1615
DataRow.metadata_fields: [
17-
DataRowMetadataField(schema_id=EMBEDDING_SCHEMA_ID,
18-
value=embeddings),
19-
DataRowMetadataField(schema_id=EMBEDDING_SCHEMA_ID,
20-
value=embeddings)
16+
DataRowMetadataField(schema_id=TEXT_SCHEMA_ID,
17+
value='some msg'),
18+
DataRowMetadataField(schema_id=TEXT_SCHEMA_ID,
19+
value='some msg 2')
2120
]
2221
},
2322
])

0 commit comments

Comments
 (0)