11# type: ignore
22import datetime
3+ import warnings
34from enum import Enum
45from itertools import chain
56from typing import List , Optional , Dict , Union , Callable , Type , Any , Generator
@@ -21,13 +22,18 @@ class DataRowMetadataKind(Enum):
2122
2223# Metadata schema
2324class DataRowMetadataSchema (BaseModel ):
24- id : SchemaId
25+ uid : SchemaId
2526 name : constr (strip_whitespace = True , min_length = 1 , max_length = 100 )
2627 reserved : bool
2728 kind : DataRowMetadataKind
2829 options : Optional [List ["DataRowMetadataSchema" ]]
2930 parent : Optional [SchemaId ]
3031
32+ @property
33+ def id (self ):
34+ warnings .warn ("`id` is being deprecated in favor of `uid`" )
35+ return self .uid
36+
3137
3238DataRowMetadataSchema .update_forward_refs ()
3339
@@ -36,7 +42,7 @@ class DataRowMetadataSchema(BaseModel):
3642DateTime : Type [datetime .datetime ] = datetime .datetime # must be in UTC
3743String : Type [str ] = constr (max_length = 500 )
3844OptionId : Type [SchemaId ] = SchemaId # enum option
39- Number : Type [float ]
45+ Number : Type [float ] = float
4046
4147DataRowMetadataValue = Union [Embedding , DateTime , String , OptionId , Number ]
4248
@@ -107,28 +113,31 @@ class DataRowMetadataOntology:
107113 """
108114
109115 def __init__ (self , client ):
110- self .client = client
111- self ._batch_size = 50
112116
113- # TODO: consider making these properties to stay in sync with server
117+ self ._client = client
118+ self ._batch_size = 50 # used for uploads and deletes
119+
114120 self ._raw_ontology = self ._get_ontology ()
121+
115122 # all fields
116- self .all_fields = self ._parse_ontology ()
117- self .all_fields_id_index = self ._make_id_index (self .all_fields )
123+ self .fields = self ._parse_ontology ()
124+ self .fields_by_id = self ._make_id_index (self .fields )
125+
118126 # reserved fields
119127 self .reserved_fields : List [DataRowMetadataSchema ] = [
120- f for f in self .all_fields if f .reserved
128+ f for f in self .fields if f .reserved
121129 ]
122- self .reserved_id_index = self ._make_id_index (self .reserved_fields )
123- self .reserved_name_index : Dict [str , DataRowMetadataSchema ] = {
130+ self .reserved_by_id = self ._make_id_index (self .reserved_fields )
131+ self .reserved_by_name : Dict [str , DataRowMetadataSchema ] = {
124132 f .name : f for f in self .reserved_fields
125133 }
134+
126135 # custom fields
127136 self .custom_fields : List [DataRowMetadataSchema ] = [
128- f for f in self .all_fields if not f .reserved
137+ f for f in self .fields if not f .reserved
129138 ]
130- self .custom_id_index = self ._make_id_index (self .custom_fields )
131- self .custom_name_index : Dict [str , DataRowMetadataSchema ] = {
139+ self .custom_by_id = self ._make_id_index (self .custom_fields )
140+ self .custom_by_name : Dict [str , DataRowMetadataSchema ] = {
132141 f .name : f for f in self .custom_fields
133142 }
134143
@@ -150,13 +159,13 @@ def _make_id_index(
150159 ) -> Dict [SchemaId , DataRowMetadataSchema ]:
151160 index = {}
152161 for f in fields :
153- index [f .id ] = f
162+ index [f .uid ] = f
154163 if f .options :
155164 for o in f .options :
156- index [o .id ] = o
165+ index [o .uid ] = o
157166 return index
158167
159- def _get_ontology (self ) -> Dict [str , Any ]:
168+ def _get_ontology (self ) -> List [ Dict [str , Any ] ]:
160169 query = """query GetMetadataOntologyBetaPyApi {
161170 customMetadataOntology {
162171 id
@@ -171,21 +180,26 @@ def _get_ontology(self) -> Dict[str, Any]:
171180 }
172181 }}
173182 """
174- return self .client .execute (query )["customMetadataOntology" ]
183+ return self ._client .execute (query )["customMetadataOntology" ]
175184
176185 def _parse_ontology (self ) -> List [DataRowMetadataSchema ]:
177186 fields = []
178187 for schema in self ._raw_ontology :
188+ schema ["uid" ] = schema .pop ("id" )
179189 options = None
180190 if schema .get ("options" ):
181- options = [
182- DataRowMetadataSchema (** {
183- ** option ,
184- ** {
185- "parent" : schema ["id" ]
186- }
187- }) for option in schema ["options" ]
188- ]
191+ options = []
192+ for option in schema ["options" ]:
193+ option ["uid" ] = option .pop ("id" )
194+ options .append (
195+ DataRowMetadataSchema (
196+ ** {
197+ ** option ,
198+ ** {
199+ "parent" : schema ["id" ]
200+ }
201+ })
202+ )
189203 schema ["options" ] = options
190204 fields .append (DataRowMetadataSchema (** schema ))
191205
@@ -197,7 +211,7 @@ def parse_metadata(
197211 Dict ]]]]) -> List [DataRowMetadata ]:
198212 """ Parse metadata responses
199213
200- >>> mdo.parse_metadata([datarow.metadata ])
214+ >>> mdo.parse_metadata([metdata ])
201215
202216 Args:
203217 unparsed: An unparsed metadata export
@@ -213,14 +227,14 @@ def parse_metadata(
213227 for dr in unparsed :
214228 fields = []
215229 for f in dr ["fields" ]:
216- schema = self .all_fields_id_index [f ["schemaId" ]]
230+ schema = self .fields_by_id [f ["schemaId" ]]
217231 if schema .kind == DataRowMetadataKind .enum :
218232 continue
219233 elif schema .kind == DataRowMetadataKind .option :
220234 field = DataRowMetadataField (schema_id = schema .parent ,
221- value = schema .id )
235+ value = schema .uid )
222236 else :
223- field = DataRowMetadataField (schema_id = schema .id ,
237+ field = DataRowMetadataField (schema_id = schema .uid ,
224238 value = f ["value" ])
225239
226240 fields .append (field )
@@ -267,7 +281,7 @@ def _batch_upsert(
267281 }
268282 }
269283 }"""
270- res = self .client .execute (
284+ res = self ._client .execute (
271285 query , {"metadata" : upserts })['upsertDataRowCustomMetadata' ]
272286 return [
273287 DataRowMetadataBatchResponse (data_row_id = r ['dataRowId' ],
@@ -330,7 +344,7 @@ def _batch_delete(
330344 }
331345 }
332346 """
333- res = self .client .execute (
347+ res = self ._client .execute (
334348 query , {"deletes" : deletes })['deleteDataRowCustomMetadata' ]
335349 failures = []
336350 for dr in res :
@@ -373,7 +387,7 @@ def _bulk_export(_data_row_ids: List[str]) -> List[DataRowMetadata]:
373387 }
374388 """
375389 return self .parse_metadata (
376- self .client .execute (
390+ self ._client .execute (
377391 query ,
378392 {"dataRowIds" : _data_row_ids })['dataRowCustomMetadata' ])
379393
@@ -386,11 +400,11 @@ def _parse_upsert(
386400 ) -> List [_UpsertDataRowMetadataInput ]:
387401 """Format for metadata upserts to GQL"""
388402
389- if metadatum .schema_id not in self .all_fields_id_index :
403+ if metadatum .schema_id not in self .fields_by_id :
390404 raise ValueError (
391405 f"Schema Id `{ metadatum .schema_id } ` not found in ontology" )
392406
393- schema = self .all_fields_id_index [metadatum .schema_id ]
407+ schema = self .fields_by_id [metadatum .schema_id ]
394408
395409 if schema .kind == DataRowMetadataKind .datetime :
396410 parsed = _validate_parse_datetime (metadatum )
@@ -413,16 +427,16 @@ def _validate_delete(self, delete: DeleteDataRowMetadata):
413427
414428 deletes = set ()
415429 for schema_id in delete .fields :
416- if schema_id not in self .all_fields_id_index :
430+ if schema_id not in self .fields_by_id :
417431 raise ValueError (
418432 f"Schema Id `{ schema_id } ` not found in ontology" )
419433
420- schema = self .all_fields_id_index [schema_id ]
434+ schema = self .fields_by_id [schema_id ]
421435 # handle users specifying enums by adding all option enums
422436 if schema .kind == DataRowMetadataKind .enum :
423- [deletes .add (o .id ) for o in schema .options ]
437+ [deletes .add (o .uid ) for o in schema .options ]
424438
425- deletes .add (schema .id )
439+ deletes .add (schema .uid )
426440
427441 return _DeleteBatchDataRowMetadata (
428442 data_row_id = delete .data_row_id ,
@@ -471,7 +485,7 @@ def _validate_enum_parse(
471485 schema : DataRowMetadataSchema ,
472486 field : DataRowMetadataField ) -> List [Dict [str , Union [SchemaId , dict ]]]:
473487 if schema .options :
474- if field .value not in {o .id for o in schema .options }:
488+ if field .value not in {o .uid for o in schema .options }:
475489 raise ValueError (
476490 f"Option `{ field .value } ` not found for { field .schema_id } " )
477491 else :
0 commit comments