Skip to content

Commit 7eda10d

Browse files
authored
Vb/bulk export metadata by gk sdk 415 (#1341)
2 parents 8584745 + 0da9028 commit 7eda10d

File tree

6 files changed

+75
-13
lines changed

6 files changed

+75
-13
lines changed

labelbox/client.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ def convert_value(value):
183183

184184
endpoint = self.endpoint if not experimental else self.endpoint.replace(
185185
"/graphql", "/_gql")
186+
186187
try:
187188
request = {
188189
'url': endpoint,

labelbox/schema/data_row_metadata.py

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,12 @@
33
from copy import deepcopy
44
from enum import Enum
55
from itertools import chain
6-
from typing import List, Optional, Dict, Union, Callable, Type, Any, Generator
6+
import warnings
7+
8+
from typing import List, Optional, Dict, Union, Callable, Type, Any, Generator, overload
79

810
from pydantic import BaseModel, conlist, constr
11+
from labelbox.schema.identifiables import DataRowIdentifiers, UniqueIds
912

1013
from labelbox.schema.ontology import SchemaId
1114
from labelbox.utils import _CamelCaseMixin, format_iso_datetime, format_iso_from_string
@@ -601,27 +604,44 @@ def _batch_delete(
601604
items,
602605
batch_size=self._batch_size)
603606

607+
@overload
604608
def bulk_export(self, data_row_ids: List[str]) -> List[DataRowMetadata]:
609+
pass
610+
611+
@overload
612+
def bulk_export(self,
613+
data_row_ids: DataRowIdentifiers) -> List[DataRowMetadata]:
614+
pass
615+
616+
def bulk_export(self, data_row_ids) -> List[DataRowMetadata]:
605617
""" Exports metadata for a list of data rows
606618
607619
>>> mdo.bulk_export([data_row.uid for data_row in data_rows])
608620
609621
Args:
610-
data_row_ids: List of data data rows to fetch metadata for
622+
data_row_ids: List of data data rows to fetch metadata for. This can be a list of strings or a DataRowIdentifiers object
623+
DataRowIdentifier objects are lists of ids or global keys. A DataIdentifier object can be a UniqueIds or GlobalKeys class.
611624
Returns:
612625
A list of DataRowMetadata.
613626
There will be one DataRowMetadata for each data_row_id passed in.
614627
This is true even if the data row does not have any meta data.
615628
Data rows without metadata will have empty `fields`.
616629
617630
"""
618-
619631
if not len(data_row_ids):
620632
raise ValueError("Empty list passed")
621633

622-
def _bulk_export(_data_row_ids: List[str]) -> List[DataRowMetadata]:
623-
query = """query dataRowCustomMetadataPyApi($dataRowIds: [ID!]!) {
624-
dataRowCustomMetadata(where: {dataRowIds : $dataRowIds}) {
634+
if isinstance(data_row_ids,
635+
list) and len(data_row_ids) > 0 and isinstance(
636+
data_row_ids[0], str):
637+
data_row_ids = UniqueIds(data_row_ids)
638+
warnings.warn("Using data row ids will be deprecated. Please use "
639+
"UniqueIds or GlobalKeys instead.")
640+
641+
def _bulk_export(
642+
_data_row_ids: DataRowIdentifiers) -> List[DataRowMetadata]:
643+
query = """query dataRowCustomMetadataPyApi($dataRowIdentifiers: DataRowCustomMetadataDataRowIdentifiersInput) {
644+
dataRowCustomMetadata(where: {dataRowIdentifiers : $dataRowIdentifiers}) {
625645
dataRowId
626646
globalKey
627647
fields {
@@ -633,8 +653,12 @@ def _bulk_export(_data_row_ids: List[str]) -> List[DataRowMetadata]:
633653
"""
634654
return self.parse_metadata(
635655
self._client.execute(
636-
query,
637-
{"dataRowIds": _data_row_ids})['dataRowCustomMetadata'])
656+
query, {
657+
"dataRowIdentifiers": {
658+
"ids": [id for id in _data_row_ids],
659+
"idType": _data_row_ids.id_type
660+
}
661+
})['dataRowCustomMetadata'])
638662

639663
return _batch_operations(_bulk_export,
640664
data_row_ids,

labelbox/schema/identifiables.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,32 @@ def __init__(self, iterable, id_type: IdType):
1212
id_type: The type of id used to identify a data row.
1313
"""
1414
self._iterable = iterable
15-
self._index = 0
1615
self._id_type = id_type
1716

17+
@property
18+
def id_type(self):
19+
return self._id_type
20+
1821
def __iter__(self):
1922
return iter(self._iterable)
2023

24+
def __getitem__(self, index):
25+
if isinstance(index, slice):
26+
ids = self._iterable[index]
27+
return self.__class__(ids) # type: ignore
28+
return self._iterable[index]
29+
30+
def __len__(self):
31+
return len(self._iterable)
32+
2133
def __repr__(self) -> str:
2234
return f"{self.__class__.__name__}({self._iterable})"
2335

36+
def __eq__(self, other: object) -> bool:
37+
if not isinstance(other, Identifiables):
38+
return False
39+
return self._iterable == other._iterable and self._id_type == other._id_type
40+
2441

2542
class UniqueIds(Identifiables):
2643
"""

labelbox/schema/project.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,7 +1289,7 @@ def update_data_row_labeling_priority(
12891289
project_param: self.uid,
12901290
data_rows_param: {
12911291
"ids": [id for id in data_rows],
1292-
"idType": data_rows._id_type,
1292+
"idType": data_rows.id_type,
12931293
},
12941294
})["project"][method]
12951295

@@ -1484,7 +1484,7 @@ def move_data_rows_to_task_queue(self, data_row_ids, task_queue_id: str):
14841484
"queueId": task_queue_id,
14851485
"dataRowIdentifiers": {
14861486
"ids": [id for id in data_row_ids],
1487-
"idType": data_row_ids._id_type,
1487+
"idType": data_row_ids.id_type,
14881488
},
14891489
},
14901490
timeout=180.0,

tests/integration/test_data_row_metadata.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from labelbox.exceptions import MalformedQueryException
88
from labelbox.schema.data_row_metadata import DataRowMetadataField, DataRowMetadata, DataRowMetadataKind, DeleteDataRowMetadata, \
99
DataRowMetadataOntology, _parse_metadata_schema
10+
from labelbox.schema.identifiables import GlobalKeys, UniqueIds
1011

1112
INVALID_SCHEMA_ID = "1" * 25
1213
FAKE_SCHEMA_ID = "0" * 25
@@ -102,6 +103,16 @@ def test_bulk_export_datarow_metadata(data_row, mdo: DataRowMetadataOntology):
102103
assert exported[0].data_row_id == data_row.uid
103104
assert len([field for field in exported[0].fields]) == 3
104105

106+
exported = mdo.bulk_export(UniqueIds([data_row.uid]))
107+
assert exported[0].global_key == data_row.global_key
108+
assert exported[0].data_row_id == data_row.uid
109+
assert len([field for field in exported[0].fields]) == 3
110+
111+
exported = mdo.bulk_export(GlobalKeys([data_row.global_key]))
112+
assert exported[0].global_key == data_row.global_key
113+
assert exported[0].data_row_id == data_row.uid
114+
assert len([field for field in exported[0].fields]) == 3
115+
105116

106117
def test_get_datarow_metadata_ontology(mdo):
107118
assert len(mdo.fields)

tests/unit/test_unit_identifiables.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,23 @@ def test_unique_ids():
55
ids = ["a", "b", "c"]
66
identifiables = UniqueIds(ids)
77
assert [i for i in identifiables] == ids
8-
assert identifiables._id_type == "ID"
8+
assert identifiables.id_type == "ID"
9+
assert len(identifiables) == 3
910

1011

1112
def test_global_keys():
1213
ids = ["a", "b", "c"]
1314
identifiables = GlobalKeys(ids)
1415
assert [i for i in identifiables] == ids
15-
assert identifiables._id_type == "GKEY"
16+
assert identifiables.id_type == "GKEY"
17+
assert len(identifiables) == 3
18+
19+
20+
def test_index_access():
21+
ids = ["a", "b", "c"]
22+
identifiables = GlobalKeys(ids)
23+
assert identifiables[0] == "a"
24+
assert identifiables[1:3] == GlobalKeys(["b", "c"])
1625

1726

1827
def test_repr():

0 commit comments

Comments
 (0)