Skip to content

Commit 065a6fa

Browse files
author
Kevin Kim
committed
Create/Update/Delete custom metadata schema
1 parent 0197e0a commit 065a6fa

File tree

3 files changed

+313
-34
lines changed

3 files changed

+313
-34
lines changed

labelbox/schema/data_row_metadata.py

Lines changed: 234 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,19 @@ class _DeleteBatchDataRowMetadata(_CamelCaseMixin):
9393
_BatchFunction = Callable[[_BatchInputs], List[DataRowMetadataBatchResponse]]
9494

9595

96+
class _UpsertCustomMetadataSchemaEnumOptionInput(_CamelCaseMixin):
97+
id: Optional[SchemaId]
98+
name: constr(strip_whitespace=True, min_length=1, max_length=100)
99+
kind: str
100+
101+
102+
class _UpsertCustomMetadataSchemaInput(_CamelCaseMixin):
103+
id: Optional[SchemaId]
104+
name: constr(strip_whitespace=True, min_length=1, max_length=100)
105+
kind: str
106+
options: Optional[List[_UpsertCustomMetadataSchemaEnumOptionInput]]
107+
108+
96109
class DataRowMetadataOntology:
97110
""" Ontology for data row metadata
98111
@@ -125,6 +138,9 @@ def _build_ontology(self):
125138
self.reserved_by_name: Dict[
126139
str,
127140
DataRowMetadataSchema] = self._make_name_index(self.reserved_fields)
141+
self.reserved_by_name_normalized: Dict[
142+
str, DataRowMetadataSchema] = self._make_normalized_name_index(
143+
self.reserved_fields)
128144

129145
# custom fields
130146
self.custom_fields: List[DataRowMetadataSchema] = [
@@ -134,6 +150,9 @@ def _build_ontology(self):
134150
self.custom_by_name: Dict[
135151
str,
136152
DataRowMetadataSchema] = self._make_name_index(self.custom_fields)
153+
self.custom_by_name_normalized: Dict[
154+
str, DataRowMetadataSchema] = self._make_normalized_name_index(
155+
self.custom_fields)
137156

138157
@staticmethod
139158
def _make_name_index(fields: List[DataRowMetadataSchema]):
@@ -147,6 +166,13 @@ def _make_name_index(fields: List[DataRowMetadataSchema]):
147166
index[f.name] = f
148167
return index
149168

169+
@staticmethod
170+
def _make_normalized_name_index(fields: List[DataRowMetadataSchema]):
171+
index = {}
172+
for f in fields:
173+
index[f.name] = f
174+
return index
175+
150176
@staticmethod
151177
def _make_id_index(
152178
fields: List[DataRowMetadataSchema]
@@ -203,6 +229,137 @@ def refresh_ontology(self):
203229
self._raw_ontology = self._get_ontology()
204230
self._build_ontology()
205231

232+
def create_schema(self,
233+
name: str,
234+
kind: DataRowMetadataKind,
235+
options: List[str] = None):
236+
""" Create metadata schema
237+
238+
>>> mdo.create_schema(name, kind, options)
239+
240+
Args:
241+
name (str): Name of metadata schema
242+
kind (DataRowMetadataKind): Kind of metadata schema as `DataRowMetadataKind`
243+
options (List[str]): List of Enum options
244+
245+
Returns:
246+
Created metadata schema as `DataRowMetadataSchema`
247+
248+
Raises:
249+
KeyError: When provided name is not a valid custom metadata
250+
"""
251+
if not isinstance(kind, DataRowMetadataKind):
252+
raise ValueError(f"kind '{kind}' must be a `DataRowMetadataKind`")
253+
254+
upsert_schema = _UpsertCustomMetadataSchemaInput(name=name,
255+
kind=kind.value)
256+
if options:
257+
if kind != DataRowMetadataKind.enum:
258+
raise ValueError(
259+
f"Kind '{kind}' must be an Enum, if Enum options are provided"
260+
)
261+
upsert_enum_options = [
262+
_UpsertCustomMetadataSchemaEnumOptionInput(
263+
name=o, kind=DataRowMetadataKind.option.value)
264+
for o in options
265+
]
266+
upsert_schema.options = upsert_enum_options
267+
268+
return self._upsert_schema(upsert_schema)
269+
270+
def update_schema(self, name: str, new_name: str):
271+
""" Update metadata schema
272+
273+
>>> mdo.update_schema(name, new_name)
274+
275+
Args:
276+
name (str): Current name of metadata schema
277+
new_name (str): New name of metadata schema
278+
279+
Returns:
280+
Updated metadata schema as `DataRowMetadataSchema`
281+
282+
Raises:
283+
KeyError: When provided name is not a valid custom metadata
284+
"""
285+
schema = self._validate_custom_schema_by_name(name)
286+
upsert_schema = _UpsertCustomMetadataSchemaInput(id=schema.uid,
287+
name=new_name,
288+
kind=schema.kind.value)
289+
if schema.options:
290+
upsert_enum_options = [
291+
_UpsertCustomMetadataSchemaEnumOptionInput(
292+
id=o.uid,
293+
name=o.name,
294+
kind=DataRowMetadataKind.option.value)
295+
for o in schema.options
296+
]
297+
upsert_schema.options = upsert_enum_options
298+
299+
return self._upsert_schema(upsert_schema)
300+
301+
def update_enum_option(self, name: str, option: str, new_option: str):
302+
""" Update Enum metadata schema option
303+
304+
>>> mdo.update_enum_option(name, option, new_option)
305+
306+
Args:
307+
name (str): Name of metadata schema to update
308+
option (str): Name of Enum option to update
309+
new_option (str): New name of Enum option
310+
311+
Returns:
312+
Updated metadata schema as `DataRowMetadataSchema`
313+
314+
Raises:
315+
KeyError: When provided name is not a valid custom metadata
316+
"""
317+
schema = self._validate_custom_schema_by_name(name)
318+
if schema.kind != DataRowMetadataKind.enum:
319+
raise ValueError(
320+
f"Updating Enum option is only supported for Enum metadata schema"
321+
)
322+
323+
upsert_schema = _UpsertCustomMetadataSchemaInput(id=schema.uid,
324+
name=schema.name,
325+
kind=schema.kind.value)
326+
upsert_enum_options = []
327+
for o in schema.options:
328+
enum_option = _UpsertCustomMetadataSchemaEnumOptionInput(
329+
id=o.uid, name=o.name, kind=o.kind.value)
330+
if enum_option.name == option:
331+
enum_option.name = new_option
332+
upsert_enum_options.append(enum_option)
333+
upsert_schema.options = upsert_enum_options
334+
335+
return self._upsert_schema(upsert_schema)
336+
337+
def delete_schema(self, name: str):
338+
""" Delete metadata schema
339+
340+
>>> mdo.delete_schema(name)
341+
342+
Args:
343+
name: Name of metadata schema to delete
344+
345+
Returns:
346+
True if deletion is successful, False if unsuccessful
347+
348+
Raises:
349+
KeyError: When provided name is not a valid custom metadata
350+
"""
351+
schema = self._validate_custom_schema_by_name(name)
352+
query = """mutation DeleteCustomMetadataSchemaPyApi($where: WhereUniqueIdInput!) {
353+
deleteCustomMetadataSchema(schema: $where){
354+
success
355+
}
356+
}"""
357+
res = self._client.execute(query, {'where': {
358+
'id': schema.uid
359+
}})['deleteCustomMetadataSchema']
360+
361+
return res['success']
362+
206363
def parse_metadata(
207364
self, unparsed: List[Dict[str,
208365
List[Union[str,
@@ -248,7 +405,7 @@ def parse_metadata_fields(
248405

249406
for f in unparsed:
250407
if f["schemaId"] not in self.fields_by_id:
251-
# Update metadata ontology if field can't be found
408+
# Fetch latest metadata ontology if metadata can't be found
252409
self.refresh_ontology()
253410
if f["schemaId"] not in self.fields_by_id:
254411
raise ValueError(
@@ -422,13 +579,58 @@ def _bulk_export(_data_row_ids: List[str]) -> List[DataRowMetadata]:
422579
data_row_ids,
423580
batch_size=self._batch_size)
424581

582+
# Convert metadata to DataRowMetadataField objects, parse all fields
583+
# and return a dictionary of metadata fields for upsert
584+
def parse_upsert_metadata(self, metadata_fields):
585+
586+
def _convert_metadata_field(metadata_field):
587+
if isinstance(metadata_field, DataRowMetadataField):
588+
return metadata_field
589+
elif isinstance(metadata_field, dict):
590+
if not all(key in metadata_field
591+
for key in ("schema_id", "value")):
592+
raise ValueError(
593+
f"Custom metadata field '{metadata_field}' must have 'schema_id' and 'value' keys"
594+
)
595+
return DataRowMetadataField(
596+
schema_id=metadata_field["schema_id"],
597+
value=metadata_field["value"])
598+
else:
599+
raise ValueError(
600+
f"Metadata field '{metadata_field}' is neither 'DataRowMetadataField' type or a dictionary"
601+
)
602+
603+
# Convert all metadata fields to DataRowMetadataField type
604+
metadata_fields = [_convert_metadata_field(m) for m in metadata_fields]
605+
parsed_metadata = list(
606+
chain.from_iterable(self._parse_upsert(m) for m in metadata_fields))
607+
return [m.dict(by_alias=True) for m in parsed_metadata]
608+
609+
def _upsert_schema(self, upsert_schema: _UpsertCustomMetadataSchemaInput):
610+
query = """mutation UpsertCustomMetadataSchemaPyApi($data: UpsertCustomMetadataSchemaInput!) {
611+
upsertCustomMetadataSchema(data: $data){
612+
id
613+
name
614+
kind
615+
options {
616+
id
617+
name
618+
kind
619+
}
620+
}
621+
}"""
622+
res = self._client.execute(
623+
query, {"data": upsert_schema.dict(exclude_none=True)
624+
})['upsertCustomMetadataSchema']
625+
return _parse_metadata_schema(res)
626+
425627
def _parse_upsert(
426628
self, metadatum: DataRowMetadataField
427629
) -> List[_UpsertDataRowMetadataInput]:
428630
"""Format for metadata upserts to GQL"""
429631

430632
if metadatum.schema_id not in self.fields_by_id:
431-
# Update metadata ontology if field can't be found
633+
# Fetch latest metadata ontology if metadata can't be found
432634
self.refresh_ontology()
433635
if metadatum.schema_id not in self.fields_by_id:
434636
raise ValueError(
@@ -453,41 +655,14 @@ def _parse_upsert(
453655

454656
return [_UpsertDataRowMetadataInput(**p) for p in parsed]
455657

456-
# Convert metadata to DataRowMetadataField objects, parse all fields
457-
# and return a dictionary of metadata fields for upsert
458-
def parse_upsert_metadata(self, metadata_fields):
459-
460-
def _convert_metadata_field(metadata_field):
461-
if isinstance(metadata_field, DataRowMetadataField):
462-
return metadata_field
463-
elif isinstance(metadata_field, dict):
464-
if not all(key in metadata_field
465-
for key in ("schema_id", "value")):
466-
raise ValueError(
467-
f"Custom metadata field '{metadata_field}' must have 'schema_id' and 'value' keys"
468-
)
469-
return DataRowMetadataField(
470-
schema_id=metadata_field["schema_id"],
471-
value=metadata_field["value"])
472-
else:
473-
raise ValueError(
474-
f"Metadata field '{metadata_field}' is neither 'DataRowMetadataField' type or a dictionary"
475-
)
476-
477-
# Convert all metadata fields to DataRowMetadataField type
478-
metadata_fields = [_convert_metadata_field(m) for m in metadata_fields]
479-
parsed_metadata = list(
480-
chain.from_iterable(self._parse_upsert(m) for m in metadata_fields))
481-
return [m.dict(by_alias=True) for m in parsed_metadata]
482-
483658
def _validate_delete(self, delete: DeleteDataRowMetadata):
484659
if not len(delete.fields):
485660
raise ValueError(f"No fields specified for {delete.data_row_id}")
486661

487662
deletes = set()
488663
for schema_id in delete.fields:
489664
if schema_id not in self.fields_by_id:
490-
# Update metadata ontology if field can't be found
665+
# Fetch latest metadata ontology if metadata can't be found
491666
self.refresh_ontology()
492667
if schema_id not in self.fields_by_id:
493668
raise ValueError(
@@ -504,6 +679,16 @@ def _validate_delete(self, delete: DeleteDataRowMetadata):
504679
data_row_id=delete.data_row_id,
505680
schema_ids=list(delete.fields)).dict(by_alias=True)
506681

682+
def _validate_custom_schema_by_name(self,
683+
name: str) -> DataRowMetadataSchema:
684+
if name not in self.custom_by_name_normalized:
685+
# Fetch latest metadata ontology if metadata can't be found
686+
self.refresh_ontology()
687+
if name not in self.custom_by_name_normalized:
688+
raise KeyError(f"'{name}' is not a valid custom metadata")
689+
690+
return self.custom_by_name_normalized[name]
691+
507692

508693
def _batch_items(iterable: List[Any], size: int) -> Generator[Any, None, None]:
509694
l = len(iterable)
@@ -596,3 +781,22 @@ def _validate_enum_parse(
596781
"schemaId": field.value,
597782
"value": {}
598783
}]
784+
785+
786+
def _parse_metadata_schema(
787+
unparsed: Dict[str, Union[str, List]]) -> DataRowMetadataSchema:
788+
uid = unparsed['id']
789+
name = unparsed['name']
790+
kind = DataRowMetadataKind(unparsed['kind'])
791+
options = [
792+
DataRowMetadataSchema(uid=o['id'],
793+
name=o['name'],
794+
reserved=False,
795+
kind=DataRowMetadataKind.option,
796+
parent=uid) for o in unparsed['options']
797+
]
798+
return DataRowMetadataSchema(uid=uid,
799+
name=name,
800+
reserved=False,
801+
kind=kind,
802+
options=options or None)

tests/integration/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def graphql_url(environ: str) -> str:
4747
if environ == Environ.PROD:
4848
return 'https://api.labelbox.com/graphql'
4949
elif environ == Environ.STAGING:
50-
return 'https://staging-api.labelbox.com/graphql'
50+
return 'https://api.lb-stage.xyz/graphql'
5151
elif environ == Environ.ONPREM:
5252
hostname = os.environ.get('LABELBOX_TEST_ONPREM_HOSTNAME', None)
5353
if hostname is None:

0 commit comments

Comments
 (0)