1616
1717
1818@pytest .fixture
19- def dr_md_ontology (client ):
19+ def mdo (client ):
2020 yield client .get_data_row_metadata_ontology ()
2121
2222
@@ -52,8 +52,7 @@ def make_metadata(dr_id) -> DataRowMetadata:
5252 return metadata
5353
5454
55- def test_get_datarow_metadata_ontology (dr_md_ontology ):
56- mdo = dr_md_ontology
55+ def test_get_datarow_metadata_ontology (mdo ):
5756 assert len (mdo .all_fields )
5857 assert len (mdo .reserved_fields )
5958 assert len (mdo .custom_fields ) == 0
@@ -66,61 +65,114 @@ def test_get_datarow_metadata(datarow):
6665 assert len (md )
6766
6867
69- def test_bulk_upsert_datarow_metadata (datarow , dr_md_ontology ):
68+ def test_bulk_upsert_datarow_metadata (datarow , mdo ):
7069 assert not len (datarow .metadata ["fields" ])
7170 metadata = make_metadata (datarow .uid )
72- dr_md_ontology .bulk_upsert ([metadata ])
71+ mdo .bulk_upsert ([metadata ])
7372 assert len (datarow .metadata ["fields" ])
7473
7574
7675@pytest .mark .slow
77- def test_large_bulk_upsert_datarow_metadata (big_dataset , dr_md_ontology ):
76+ def test_large_bulk_upsert_datarow_metadata (big_dataset , mdo ):
7877 metadata = []
7978 for dr in big_dataset .export_data_rows ():
8079 metadata .append (make_metadata (dr .uid ))
81- response = dr_md_ontology .bulk_upsert (metadata )
80+ response = mdo .bulk_upsert (metadata )
8281 assert response
8382
8483 for dr in big_dataset .export_data_rows ():
8584 assert len (dr .metadata ["fields" ])
8685 break
8786
8887
89- def test_bulk_delete_datarow_metadata (datarow : DataRow , dr_md_ontology ):
90- """test bulk deletes for non non fields"""
88+ def test_bulk_delete_datarow_metadata (datarow , mdo ):
89+ """test bulk deletes for all fields
90+
91+ TODO: this fails because of the enum validation issue
92+
93+ """
9194 assert not len (datarow .metadata ["fields" ])
95+
9296 metadata = make_metadata (datarow .uid )
93- metadata .fields = [
94- m for m in metadata .fields if m .schema_id != SPLIT_SCHEMA_ID
95- ]
96- dr_md_ontology .bulk_upsert ([metadata ])
97+ mdo .bulk_upsert ([metadata ])
98+
9799 assert len (datarow .metadata ["fields" ])
98100
99- dr_md_ontology .bulk_delete ([
101+ mdo .bulk_delete ([
100102 DeleteDataRowMetadata (data_row_id = datarow .uid ,
101103 fields = [m .schema_id for m in metadata .fields ])
102104 ])
103- assert not (len (datarow .metadata ["fields" ]))
105+
106+ assert not len (datarow .metadata ["fields" ])
107+
108+
109+ def test_bulk_partial_delete_datarow_metadata (datarow , mdo ):
110+ """Delete a single from metadata"""
111+ assert not len (datarow .metadata ["fields" ])
112+
113+ metadata = make_metadata (datarow .uid )
114+ mdo .bulk_upsert ([metadata ])
115+
116+ assert len (datarow .metadata ["fields" ])
117+
118+ mdo .bulk_delete ([
119+ DeleteDataRowMetadata (data_row_id = datarow .uid , fields = [TEXT_SCHEMA_ID ])
120+ ])
121+
122+ assert len (datarow .metadata ["fields" ]) == 4
123+
124+
125+ @pytest .mark .slow
126+ def test_large_bulk_delete_datarow_metadata (big_dataset , mdo ):
127+ metadata = []
128+ for dr in big_dataset .export_data_rows ():
129+ metadata .append (
130+ DataRowMetadata (data_row_id = dr .uid ,
131+ fields = [
132+ DataRowMetadataField (
133+ schema_id = EMBEDDING_SCHEMA_ID ,
134+ value = [0.1 ] * 128 ),
135+ DataRowMetadataField (schema_id = TEXT_SCHEMA_ID ,
136+ value = "test-message" )
137+ ]))
138+ response = mdo .bulk_upsert (metadata )
139+ assert response
140+
141+ deletes = []
142+ for dr in big_dataset .export_data_rows ():
143+ deletes .append (
144+ DeleteDataRowMetadata (
145+ data_row_id = dr .uid ,
146+ fields = [
147+ EMBEDDING_SCHEMA_ID , #
148+ CAPTURE_DT_SCHEMA_ID
149+ ]))
150+
151+ response = mdo .bulk_delete (deletes )
152+ assert response
153+ for dr in big_dataset .export_data_rows ():
154+ assert len (dr .metadata ["fields" ]) == 1
155+ break
104156
105157
106- def test_bulk_delete_datarow_enum_metadata (datarow : DataRow , dr_md_ontology ):
158+ def test_bulk_delete_datarow_enum_metadata (datarow : DataRow , mdo ):
107159 """test bulk deletes for non non fields"""
108160 assert not len (datarow .metadata ["fields" ])
109161 metadata = make_metadata (datarow .uid )
110162 metadata .fields = [
111163 m for m in metadata .fields if m .schema_id == SPLIT_SCHEMA_ID
112164 ]
113- dr_md_ontology .bulk_upsert ([metadata ])
165+ mdo .bulk_upsert ([metadata ])
114166 assert len (datarow .metadata ["fields" ])
115167
116- dr_md_ontology .bulk_delete ([
168+ mdo .bulk_delete ([
117169 DeleteDataRowMetadata (data_row_id = datarow .uid ,
118- fields = [TEST_SPLIT_ID , SPLIT_SCHEMA_ID ])
170+ fields = [SPLIT_SCHEMA_ID ])
119171 ])
120- assert not ( len (datarow .metadata ["fields" ]) )
172+ assert not len (datarow .metadata ["fields" ])
121173
122174
123- def test_raise_enum_upsert_schema_error (datarow , dr_md_ontology ):
175+ def test_raise_enum_upsert_schema_error (datarow , mdo ):
124176 """Setting an option id as the schema id will raise a Value Error"""
125177
126178 metadata = DataRowMetadata (data_row_id = datarow .uid ,
@@ -129,10 +181,10 @@ def test_raise_enum_upsert_schema_error(datarow, dr_md_ontology):
129181 value = SPLIT_SCHEMA_ID ),
130182 ])
131183 with pytest .raises (ValueError ):
132- dr_md_ontology .bulk_upsert ([metadata ])
184+ mdo .bulk_upsert ([metadata ])
133185
134186
135- def test_upsert_non_existent_schema_id (datarow , dr_md_ontology ):
187+ def test_upsert_non_existent_schema_id (datarow , mdo ):
136188 """Raise error on non-existent schema id"""
137189 metadata = DataRowMetadata (data_row_id = datarow .uid ,
138190 fields = [
@@ -141,10 +193,34 @@ def test_upsert_non_existent_schema_id(datarow, dr_md_ontology):
141193 value = "message" ),
142194 ])
143195 with pytest .raises (ValueError ):
144- dr_md_ontology .bulk_upsert ([metadata ])
196+ mdo .bulk_upsert ([metadata ])
197+
198+
199+ def test_delete_non_existent_schema_id (datarow , mdo ):
200+ assert not len (datarow .metadata ["fields" ])
201+ results = mdo .bulk_delete ([
202+ DeleteDataRowMetadata (data_row_id = datarow .uid ,
203+ fields = [EMBEDDING_SCHEMA_ID ])
204+ ])
205+ assert results
206+
207+
208+ @pytest .mark .slow
209+ def test_large_bulk_delete_non_existent_schema_id (big_dataset , mdo ):
210+ deletes = []
211+ for dr in big_dataset .export_data_rows ():
212+ deletes .append (
213+ DeleteDataRowMetadata (data_row_id = dr .uid ,
214+ fields = [EMBEDDING_SCHEMA_ID ]))
215+ response = mdo .bulk_delete (deletes )
216+ assert response
217+
218+ for dr in big_dataset .export_data_rows ():
219+ assert not len (dr .metadata ["fields" ])
220+ break
145221
146222
147- def test_parse_raw_metadata (dr_md_ontology ):
223+ def test_parse_raw_metadata (mdo ):
148224 example = {
149225 'data_row_id' :
150226 'ckr6kkfx801ui0yrtg9fje8xh' ,
@@ -163,7 +239,7 @@ def test_parse_raw_metadata(dr_md_ontology):
163239 }]
164240 }
165241
166- parsed = dr_md_ontology .parse_metadata ([example ])
242+ parsed = mdo .parse_metadata ([example ])
167243 assert len (parsed ) == 1
168244 row = parsed [0 ]
169245 assert row .data_row_id == example ["data_row_id" ]
0 commit comments