11from datetime import datetime
2+ import time
3+ from attr import field
24
35import pytest
46
@@ -34,6 +36,17 @@ def big_dataset(dataset: Dataset, image_url):
3436 dataset .delete ()
3537
3638
39+ def wait_for_embeddings_svc (data_row_ids , mdo ):
40+ for idx in range (5 ):
41+ if all ([
42+ len (metadata .fields )
43+ for metadata in mdo .bulk_export (data_row_ids )
44+ ]):
45+ return
46+ time .sleep ((idx + 1 )** 2 )
47+ raise Exception ("Embedding svc failed to update metadata." )
48+
49+
3750def make_metadata (dr_id ) -> DataRowMetadata :
3851 embeddings = [0.0 ] * 128
3952 msg = "A message"
@@ -59,25 +72,21 @@ def test_get_datarow_metadata_ontology(mdo):
5972
6073
6174def test_bulk_upsert_datarow_metadata (datarow , mdo : DataRowMetadataOntology ):
75+ wait_for_embeddings_svc ([datarow .uid ], mdo )
6276 metadata = make_metadata (datarow .uid )
6377 mdo .bulk_upsert ([metadata ])
6478 assert len (mdo .bulk_export ([datarow .uid ]))
65- assert len (mdo .bulk_export ([datarow .uid ])[0 ].fields )
66-
67-
68- def test_parse_upsert_datarow_metadata (datarow , mdo : DataRowMetadataOntology ):
69- metadata = make_metadata (datarow .uid )
70- mdo .bulk_upsert ([metadata ])
71- assert mdo .bulk_export ([datarow .uid ])
79+ assert len (mdo .bulk_export ([datarow .uid ])[0 ].fields ) == 5
7280
7381
7482@pytest .mark .slow
7583def test_large_bulk_upsert_datarow_metadata (big_dataset , mdo ):
7684 metadata = []
7785 data_row_ids = []
78- for dr in big_dataset .data_rows ():
79- metadata .append (make_metadata (dr .uid ))
80- data_row_ids .append (dr .uid )
86+ data_row_ids = [dr .uid for dr in big_dataset .data_rows ()]
87+ wait_for_embeddings_svc (data_row_ids , mdo )
88+ for data_row_id in data_row_ids :
89+ metadata .append (make_metadata (data_row_id ))
8190 errors = mdo .bulk_upsert (metadata )
8291 assert len (errors ) == 0
8392
@@ -120,10 +129,13 @@ def test_bulk_partial_delete_datarow_metadata(datarow, mdo):
120129
121130
122131def test_large_bulk_delete_datarow_metadata (big_dataset , mdo ):
132+
123133 metadata = []
124- for dr in big_dataset .data_rows ():
134+ data_row_ids = [dr .uid for dr in big_dataset .data_rows ()]
135+ wait_for_embeddings_svc (data_row_ids , mdo )
136+ for data_row_id in data_row_ids :
125137 metadata .append (
126- DataRowMetadata (data_row_id = dr . uid ,
138+ DataRowMetadata (data_row_id = data_row_id ,
127139 fields = [
128140 DataRowMetadataField (
129141 schema_id = EMBEDDING_SCHEMA_ID ,
@@ -135,25 +147,26 @@ def test_large_bulk_delete_datarow_metadata(big_dataset, mdo):
135147 assert len (errors ) == 0
136148
137149 deletes = []
138- for dr in big_dataset . data_rows () :
150+ for data_row_id in data_row_ids :
139151 deletes .append (
140152 DeleteDataRowMetadata (
141- data_row_id = dr . uid ,
153+ data_row_id = data_row_id ,
142154 fields = [
143155 EMBEDDING_SCHEMA_ID , #
144156 CAPTURE_DT_SCHEMA_ID
145157 ]))
146-
147158 errors = mdo .bulk_delete (deletes )
148159 assert len (errors ) == 0
149- for dr in big_dataset .data_rows ():
150- # 1 remaining because only the embeddings id overlaps
151- assert len (mdo .bulk_export ([dr .uid ])[0 ].fields ) == 1
160+ for data_row_id in data_row_ids :
161+ # 2 remaining because we delete the user provided embedding but text and labelbox generated embeddings still exist
162+ fields = mdo .bulk_export ([data_row_id ])[0 ].fields
163+ assert len (fields ) == 2
164+ assert EMBEDDING_SCHEMA_ID not in [field .schema_id for field in fields ]
152165
153166
154167def test_bulk_delete_datarow_enum_metadata (datarow : DataRow , mdo ):
155168 """test bulk deletes for non non fields"""
156- n_fields = len ( mdo . bulk_export ( [datarow .uid ])[ 0 ]. fields )
169+ wait_for_embeddings_svc ( [datarow .uid ], mdo )
157170 metadata = make_metadata (datarow .uid )
158171 metadata .fields = [
159172 m for m in metadata .fields if m .schema_id == SPLIT_SCHEMA_ID
@@ -167,7 +180,7 @@ def test_bulk_delete_datarow_enum_metadata(datarow: DataRow, mdo):
167180 mdo .bulk_delete ([
168181 DeleteDataRowMetadata (data_row_id = datarow .uid , fields = [SPLIT_SCHEMA_ID ])
169182 ])
170- assert len (mdo .bulk_export ([datarow .uid ])[0 ].fields ) == n_fields
183+ assert len (mdo .bulk_export ([datarow .uid ])[0 ].fields ) == 1
171184
172185
173186def test_raise_enum_upsert_schema_error (datarow , mdo ):
@@ -202,24 +215,6 @@ def test_delete_non_existent_schema_id(datarow, mdo):
202215 # No message is returned
203216
204217
205- @pytest .mark .slow
206- def test_large_bulk_delete_non_existent_schema_id (big_dataset , mdo ):
207- deletes = []
208- n_fields_start = 0
209- for idx , dr in enumerate (big_dataset .data_rows ()):
210- if idx == 0 :
211- n_fields_start = len (mdo .bulk_export ([dr .uid ])[0 ].fields )
212- deletes .append (
213- DeleteDataRowMetadata (data_row_id = dr .uid ,
214- fields = [EMBEDDING_SCHEMA_ID ]))
215- errors = mdo .bulk_delete (deletes )
216- assert len (errors ) == 0
217-
218- for dr in big_dataset .export_data_rows ():
219- assert len (mdo .bulk_export ([dr .uid ])[0 ].fields ) == n_fields_start
220- break
221-
222-
223218def test_parse_raw_metadata (mdo ):
224219 example = {
225220 'dataRowId' :
0 commit comments