77from labelbox .schema .data_row_metadata import DataRowMetadataField , DataRowMetadata , DeleteDataRowMetadata , \
88 DataRowMetadataOntology
99
10+ INVALID_SCHEMA_ID = "1" * 25
1011FAKE_SCHEMA_ID = "0" * 25
1112FAKE_DATAROW_ID = "D" * 25
1213SPLIT_SCHEMA_ID = "cko8sbczn0002h2dkdaxb5kal"
1516EMBEDDING_SCHEMA_ID = "ckpyije740000yxdk81pbgjdc"
1617TEXT_SCHEMA_ID = "cko8s9r5v0001h2dk9elqdidh"
1718CAPTURE_DT_SCHEMA_ID = "cko8sdzv70006h2dk8jg64zvb"
19+ PRE_COMPUTED_EMBEDDINGS_ID = 'ckrzang79000008l6hb5s6za1'
1820
1921FAKE_NUMBER_FIELD = {
2022 "id" : FAKE_SCHEMA_ID ,
@@ -40,24 +42,13 @@ def big_dataset(dataset: Dataset, image_url):
4042 "row_data" : image_url ,
4143 "external_id" : "my-image"
4244 },
43- ] * 250 )
45+ ] * 5 )
4446 task .wait_till_done ()
4547
4648 yield dataset
4749 dataset .delete ()
4850
4951
50- def wait_for_embeddings_svc (data_row_ids , mdo ):
51- for idx in range (5 ):
52- if all ([
53- len (metadata .fields )
54- for metadata in mdo .bulk_export (data_row_ids )
55- ]):
56- return
57- time .sleep ((idx + 1 )** 2 )
58- raise Exception ("Embedding svc failed to update metadata." )
59-
60-
6152def make_metadata (dr_id ) -> DataRowMetadata :
6253 embeddings = [0.0 ] * 128
6354 msg = "A message"
@@ -97,18 +88,20 @@ def test_get_datarow_metadata_ontology(mdo):
9788
9889
9990def test_bulk_upsert_datarow_metadata (datarow , mdo : DataRowMetadataOntology ):
100- wait_for_embeddings_svc ([datarow .uid ], mdo )
10191 metadata = make_metadata (datarow .uid )
10292 mdo .bulk_upsert ([metadata ])
103- assert len (mdo .bulk_export ([datarow .uid ]))
104- assert len (mdo .bulk_export ([datarow .uid ])[0 ].fields ) == 5
93+ exported = mdo .bulk_export ([datarow .uid ])
94+ assert len (exported )
95+ assert len ([
96+ field for field in exported [0 ].fields
97+ if field .schema_id != PRE_COMPUTED_EMBEDDINGS_ID
98+ ]) == 4
10599
106100
107101@pytest .mark .slow
108102def test_large_bulk_upsert_datarow_metadata (big_dataset , mdo ):
109103 metadata = []
110104 data_row_ids = [dr .uid for dr in big_dataset .data_rows ()]
111- wait_for_embeddings_svc (data_row_ids , mdo )
112105 for data_row_id in data_row_ids :
113106 metadata .append (make_metadata (data_row_id ))
114107 errors = mdo .bulk_upsert (metadata )
@@ -119,14 +112,16 @@ def test_large_bulk_upsert_datarow_metadata(big_dataset, mdo):
119112 for metadata in mdo .bulk_export (data_row_ids )
120113 }
121114 for data_row_id in data_row_ids :
122- assert len (metadata_lookup .get (data_row_id ).fields )
115+ assert len ([
116+ f for f in metadata_lookup .get (data_row_id ).fields
117+ if f .schema_id != PRE_COMPUTED_EMBEDDINGS_ID
118+ ]), metadata_lookup .get (data_row_id ).fields
123119
124120
125121def test_bulk_delete_datarow_metadata (datarow , mdo ):
126122 """test bulk deletes for all fields"""
127123 metadata = make_metadata (datarow .uid )
128124 mdo .bulk_upsert ([metadata ])
129-
130125 assert len (mdo .bulk_export ([datarow .uid ])[0 ].fields )
131126 upload_ids = [m .schema_id for m in metadata .fields [:- 2 ]]
132127 mdo .bulk_delete (
@@ -155,7 +150,6 @@ def test_bulk_partial_delete_datarow_metadata(datarow, mdo):
155150def test_large_bulk_delete_datarow_metadata (big_dataset , mdo ):
156151 metadata = []
157152 data_row_ids = [dr .uid for dr in big_dataset .data_rows ()]
158- wait_for_embeddings_svc (data_row_ids , mdo )
159153 for data_row_id in data_row_ids :
160154 metadata .append (
161155 DataRowMetadata (data_row_id = data_row_id ,
@@ -181,29 +175,33 @@ def test_large_bulk_delete_datarow_metadata(big_dataset, mdo):
181175 errors = mdo .bulk_delete (deletes )
182176 assert len (errors ) == 0
183177 for data_row_id in data_row_ids :
184- # 2 remaining because we delete the user provided embedding but text and labelbox generated embeddings still exist
185- fields = mdo .bulk_export ([data_row_id ])[0 ].fields
186- assert len (fields ) == 2
178+ fields = [
179+ f for f in mdo .bulk_export ([data_row_id ])[0 ].fields
180+ if f .schema_id != PRE_COMPUTED_EMBEDDINGS_ID
181+ ]
182+ assert len (fields ) == 1 , fields
187183 assert EMBEDDING_SCHEMA_ID not in [field .schema_id for field in fields ]
188184
189185
190186def test_bulk_delete_datarow_enum_metadata (datarow : DataRow , mdo ):
191187 """test bulk deletes for non non fields"""
192- wait_for_embeddings_svc ([datarow .uid ], mdo )
193188 metadata = make_metadata (datarow .uid )
194189 metadata .fields = [
195190 m for m in metadata .fields if m .schema_id == SPLIT_SCHEMA_ID
196191 ]
197192 mdo .bulk_upsert ([metadata ])
198193
199- assert len (mdo .bulk_export ([datarow .uid ])[0 ].fields ) == len (
194+ exported = mdo .bulk_export ([datarow .uid ])[0 ].fields
195+ assert len (exported ) == len (
200196 set ([x .schema_id for x in metadata .fields ] +
201- [x .schema_id for x in mdo . bulk_export ([ datarow . uid ])[ 0 ]. fields ]))
197+ [x .schema_id for x in exported ]))
202198
203199 mdo .bulk_delete ([
204200 DeleteDataRowMetadata (data_row_id = datarow .uid , fields = [SPLIT_SCHEMA_ID ])
205201 ])
206- assert len (mdo .bulk_export ([datarow .uid ])[0 ].fields ) == 1
202+ exported = mdo .bulk_export ([datarow .uid ])[0 ].fields
203+ assert len (
204+ [f for f in exported if f .schema_id != PRE_COMPUTED_EMBEDDINGS_ID ]) == 0
207205
208206
209207def test_raise_enum_upsert_schema_error (datarow , mdo ):
@@ -223,7 +221,7 @@ def test_upsert_non_existent_schema_id(datarow, mdo):
223221 metadata = DataRowMetadata (data_row_id = datarow .uid ,
224222 fields = [
225223 DataRowMetadataField (
226- schema_id = FAKE_SCHEMA_ID ,
224+ schema_id = INVALID_SCHEMA_ID ,
227225 value = "message" ),
228226 ])
229227 with pytest .raises (ValueError ):
0 commit comments