Skip to content

Commit f6ce4b6

Browse files
author
Kevin Kim
committed
Merge branch 'develop' of https://github.com/Labelbox/labelbox-python into kkim/AL-2219
2 parents ddddc60 + 47063d7 commit f6ce4b6

File tree

14 files changed

+188
-31
lines changed

14 files changed

+188
-31
lines changed

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
# Changelog
22

3+
# Version 3.21.0
4+
## Added
5+
* Projects can be created with a `media_type`
6+
* Added `media_type` attribute to `Project`
7+
* New `MediaType` enumeration
8+
9+
## Fix
10+
* Added back the mimetype to datarow bulk uploads for orgs that require delegated access
11+
312
# Version 3.20.1 (2022-05-02)
413
## Updated
514
* Ontology Classification `scope` field is only set for top level classifications

labelbox/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
name = "labelbox"
2-
__version__ = "3.20.1"
2+
__version__ = "3.21.0"
33

44
import sys
55
import warnings
66

77
if sys.version_info < (3, 7):
8-
warnings.warn("""Python 3.6 will no longer be actively supported
8+
warnings.warn("""Python 3.6 will no longer be actively supported
99
starting 06/01/2022. Please upgrade to Python 3.7 or higher.""")
1010

1111
from labelbox.client import Client

labelbox/client.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ def __init__(self,
9090
'Authorization': 'Bearer %s' % api_key,
9191
'X-User-Agent': f'python-sdk {SDK_VERSION}'
9292
}
93+
self._data_row_metadata_ontology = None
9394

9495
@retry.Retry(predicate=retry.if_exception_type(
9596
labelbox.exceptions.InternalServerError))
@@ -648,7 +649,9 @@ def get_data_row_metadata_ontology(self) -> DataRowMetadataOntology:
648649
DataRowMetadataOntology: The ontology for Data Row Metadata for an organization
649650
650651
"""
651-
return DataRowMetadataOntology(self)
652+
if self._data_row_metadata_ontology is None:
653+
self._data_row_metadata_ontology = DataRowMetadataOntology(self)
654+
return self._data_row_metadata_ontology
652655

653656
def get_model(self, model_id) -> Model:
654657
""" Gets a single Model with the given ID.

labelbox/exceptions.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,12 @@ class InvalidQueryError(LabelboxError):
7171
pass
7272

7373

74+
class ResourceCreationError(LabelboxError):
75+
""" Indicates that a resource could not be created in the server side
76+
due to a validation or transaction error"""
77+
pass
78+
79+
7480
class NetworkError(LabelboxError):
7581
"""Raised when an HTTPError occurs."""
7682

@@ -122,4 +128,4 @@ class MALValidationError(LabelboxError):
122128

123129
class OperationNotAllowedException(Exception):
124130
"""Raised when user does not have permissions to a resource or has exceeded usage limit"""
125-
pass
131+
pass

labelbox/orm/model.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@ class Field:
3232
Attributes:
3333
field_type (Field.Type): The type of the field.
3434
name (str): name that the attribute has in client-side Python objects
35-
grapgql_name (str): name that the attribute has in queries (and in
35+
graphql_name (str): name that the attribute has in queries (and in
3636
server-side database definition).
37+
result_subquery (str): graphql query result payload for a field.
3738
"""
3839

3940
class Type(Enum):
@@ -55,13 +56,23 @@ def name(self):
5556
return self.enum_cls.__name__
5657

5758
class ListType:
59+
""" Represents Field that is a list of some object.
60+
Args:
61+
list_cls (type): Type of object that list is made of.
62+
graphql_type (str): Inner object's graphql type.
63+
By default, the list_cls's name is used as the graphql type.
64+
"""
5865

59-
def __init__(self, list_cls: type):
66+
def __init__(self, list_cls: type, graphql_type=None):
6067
self.list_cls = list_cls
68+
if graphql_type is None:
69+
self.graphql_type = self.list_cls.__name__
70+
else:
71+
self.graphql_type = graphql_type
6172

6273
@property
6374
def name(self):
64-
return self.list_cls.__name__
75+
return f"[{self.graphql_type}]"
6576

6677
class Order(Enum):
6778
""" Type of sort ordering. """
@@ -101,13 +112,14 @@ def Json(*args):
101112
return Field(Field.Type.Json, *args)
102113

103114
@staticmethod
104-
def List(list_cls: type, *args):
105-
return Field(Field.ListType(list_cls), *args)
115+
def List(list_cls: type, graphql_type=None, **kwargs):
116+
return Field(Field.ListType(list_cls, graphql_type), **kwargs)
106117

107118
def __init__(self,
108119
field_type: Union[Type, EnumType, ListType],
109120
name,
110-
graphql_name=None):
121+
graphql_name=None,
122+
result_subquery=None):
111123
""" Field init.
112124
Args:
113125
field_type (Field.Type): The type of the field.
@@ -116,12 +128,14 @@ def __init__(self,
116128
graphql_name (str): query and server-side name of a database object.
117129
If None, it is constructed from the client-side name by converting
118130
snake_case (Python convention) into camelCase (GraphQL convention).
131+
result_subquery (str): graphql query result payload for a field.
119132
"""
120133
self.field_type = field_type
121134
self.name = name
122135
if graphql_name is None:
123136
graphql_name = utils.camel_case(name)
124137
self.graphql_name = graphql_name
138+
self.result_subquery = result_subquery
125139

126140
@property
127141
def asc(self):

labelbox/orm/query.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@ def results_query_part(entity):
4242
"""
4343
# Query for fields
4444
fields = [
45-
field.graphql_name if field.graphql_name != "customMetadata" else
46-
"customMetadata { value schemaId }" for field in entity.fields()
45+
field.result_subquery
46+
if field.result_subquery is not None else field.graphql_name
47+
for field in entity.fields()
4748
]
4849

4950
# Query for cached relationships

labelbox/schema/batch.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,10 @@ def export_data_rows(self, timeout_seconds=120) -> Generator:
104104
response = requests.get(download_url)
105105
response.raise_for_status()
106106
reader = ndjson.reader(StringIO(response.text))
107-
return (
108-
Entity.DataRow(self.client, result) for result in reader)
107+
# TODO: Update result to parse customMetadata when resolver returns
108+
return (Entity.DataRow(self.client, {
109+
**result, 'customMetadata': []
110+
}) for result in reader)
109111
elif res["status"] == "FAILED":
110112
raise LabelboxError("Data row export failed.")
111113

labelbox/schema/data_row.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
import logging
2-
from datetime import datetime
3-
from typing import List, Dict, Union, TYPE_CHECKING
2+
from typing import TYPE_CHECKING
43

54
from labelbox.orm import query
65
from labelbox.orm.db_object import DbObject, Updateable, BulkDeletable
76
from labelbox.orm.model import Entity, Field, Relationship
8-
from labelbox.schema.data_row_metadata import DataRowMetadataField
7+
from labelbox.schema.data_row_metadata import DataRowMetadataField # type: ignore
98

109
if TYPE_CHECKING:
1110
from labelbox import AssetAttachment
@@ -23,6 +22,7 @@ class DataRow(DbObject, Updateable, BulkDeletable):
2322
updated_at (datetime)
2423
created_at (datetime)
2524
media_attributes (dict): generated media attributes for the datarow
25+
custom_metadata (list): metadata associated with the datarow
2626
2727
dataset (Relationship): `ToOne` relationship to Dataset
2828
created_by (Relationship): `ToOne` relationship to User
@@ -35,7 +35,11 @@ class DataRow(DbObject, Updateable, BulkDeletable):
3535
updated_at = Field.DateTime("updated_at")
3636
created_at = Field.DateTime("created_at")
3737
media_attributes = Field.Json("media_attributes")
38-
custom_metadata = Field.List(DataRowMetadataField, "custom_metadata")
38+
custom_metadata = Field.List(
39+
DataRowMetadataField,
40+
graphql_type="DataRowCustomMetadataUpsertInput!",
41+
name="custom_metadata",
42+
result_subquery="customMetadata { value schemaId }")
3943

4044
# Relationships
4145
dataset = Relationship.ToOne("Dataset")
@@ -97,4 +101,4 @@ def create_attachment(self, attachment_type,
97101
data_row_id_param: self.uid
98102
})
99103
return Entity.AssetAttachment(self.client,
100-
res["createDataRowAttachment"])
104+
res["createDataRowAttachment"])

labelbox/schema/data_row_metadata.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,10 @@ def _parse_ontology(raw_ontology) -> List[DataRowMetadataSchema]:
200200

201201
return fields
202202

203+
def refresh_ontology(self):
204+
self._raw_ontology = self._get_ontology()
205+
self._build_ontology()
206+
203207
def parse_metadata(
204208
self, unparsed: List[Dict[str,
205209
List[Union[str,
@@ -221,6 +225,14 @@ def parse_metadata(
221225
for dr in unparsed:
222226
fields = []
223227
for f in dr["fields"]:
228+
if f["schemaId"] not in self.fields_by_id:
229+
# Update metadata ontology if field can't be found
230+
self.refresh_ontology()
231+
if f["schemaId"] not in self.fields_by_id:
232+
raise ValueError(
233+
f"Schema Id `{f['schemaId']}` not found in ontology"
234+
)
235+
224236
schema = self.fields_by_id[f["schemaId"]]
225237
if schema.kind == DataRowMetadataKind.enum:
226238
continue
@@ -297,7 +309,6 @@ def _batch_upsert(
297309
chain.from_iterable(
298310
self.parse_upsert(m) for m in m.fields))).dict(
299311
by_alias=True))
300-
301312
res = _batch_operations(_batch_upsert, items, self._batch_size)
302313
return res
303314

@@ -399,8 +410,11 @@ def parse_upsert(
399410
"""Format for metadata upserts to GQL"""
400411

401412
if metadatum.schema_id not in self.fields_by_id:
402-
raise ValueError(
403-
f"Schema Id `{metadatum.schema_id}` not found in ontology")
413+
# Update metadata ontology if field can't be found
414+
self.refresh_ontology()
415+
if metadatum.schema_id not in self.fields_by_id:
416+
raise ValueError(
417+
f"Schema Id `{metadatum.schema_id}` not found in ontology")
404418

405419
schema = self.fields_by_id[metadatum.schema_id]
406420

@@ -428,8 +442,11 @@ def _validate_delete(self, delete: DeleteDataRowMetadata):
428442
deletes = set()
429443
for schema_id in delete.fields:
430444
if schema_id not in self.fields_by_id:
431-
raise ValueError(
432-
f"Schema Id `{schema_id}` not found in ontology")
445+
# Update metadata ontology if field can't be found
446+
self.refresh_ontology()
447+
if schema_id not in self.fields_by_id:
448+
raise ValueError(
449+
f"Schema Id `{schema_id}` not found in ontology")
433450

434451
schema = self.fields_by_id[schema_id]
435452
# handle users specifying enums by adding all option enums

labelbox/schema/dataset.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,18 @@ def create_data_row(self, **kwargs) -> "DataRow":
7979
if os.path.exists(row_data):
8080
kwargs[DataRow.row_data.name] = self.client.upload_file(row_data)
8181
kwargs[DataRow.dataset.name] = self
82+
83+
# Parse metadata fields, if they are provided
84+
if DataRow.custom_metadata.name in kwargs:
85+
mdo = self.client.get_data_row_metadata_ontology()
86+
metadata_fields = kwargs[DataRow.custom_metadata.name]
87+
metadata = list(
88+
chain.from_iterable(
89+
mdo.parse_upsert(m) for m in metadata_fields))
90+
kwargs[DataRow.custom_metadata.name] = [
91+
md.dict(by_alias=True) for md in metadata
92+
]
93+
8294
return self.client._create(DataRow, kwargs)
8395

8496
def create_data_rows_sync(self, items) -> None:
@@ -344,8 +356,9 @@ def convert_item(item):
344356
items = [future.result() for future in as_completed(futures)]
345357
# Prepare and upload the desciptor file
346358
data = json.dumps(items)
347-
#print(f"!! input data: {data}")
348-
return self.client.upload_data(data)
359+
return self.client.upload_data(data,
360+
content_type="application/json",
361+
filename="json_import.json")
349362

350363
def data_rows_for_external_id(self,
351364
external_id,
@@ -426,8 +439,10 @@ def export_data_rows(self, timeout_seconds=120) -> Generator:
426439
response = requests.get(download_url)
427440
response.raise_for_status()
428441
reader = ndjson.reader(StringIO(response.text))
429-
return (
430-
Entity.DataRow(self.client, result) for result in reader)
442+
# TODO: Update result to parse customMetadata when resolver returns
443+
return (Entity.DataRow(self.client, {
444+
**result, 'customMetadata': []
445+
}) for result in reader)
431446
elif res["status"] == "FAILED":
432447
raise LabelboxError("Data row export failed.")
433448

0 commit comments

Comments
 (0)