Skip to content

Commit 14e2338

Browse files
author
Kevin Kim
committed
Remove 'AssignGlobalKeyToDataRowInput', change timeout mechanism, handle 'FAILED' job, rename vars
1 parent 6ebc2ea commit 14e2338

File tree

5 files changed

+119
-82
lines changed

5 files changed

+119
-82
lines changed

labelbox/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,4 @@
2626
from labelbox.schema.iam_integration import IAMIntegration
2727
from labelbox.schema.resource_tag import ResourceTag
2828
from labelbox.schema.project_resource_tag import ProjectResourceTag
29-
from labelbox.schema.media_type import MediaType
30-
from labelbox.schema.global_key import AssignGlobalKeyToDataRowInput
29+
from labelbox.schema.media_type import MediaType

labelbox/client.py

Lines changed: 89 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
from labelbox.schema.user import User
3333
from labelbox.schema.project import Project
3434
from labelbox.schema.role import Role
35-
from labelbox.schema.global_key import AssignGlobalKeyToDataRowInput
3635

3736
from labelbox.schema.media_type import MediaType
3837

@@ -944,39 +943,48 @@ def get_model_run(self, model_run_id: str) -> ModelRun:
944943

945944
def assign_global_keys_to_data_rows(
946945
self,
947-
global_key_to_data_row_inputs: List[AssignGlobalKeyToDataRowInput],
948-
timeout=30) -> List[Dict[str, str]]:
946+
global_key_to_data_row_inputs: List[Dict[str, str]],
947+
timeout_seconds=60) -> List[Dict[str, str]]:
949948
"""
950949
Assigns global keys to the related data rows.
951950
952951
>>> global_key_data_row_inputs = [
953-
AssignGlobalKeyToDataRowInput(data_row_id = "cl7asgri20yvo075b4vtfedjb", global_key = "key1"),
954-
AssignGlobalKeyToDataRowInput(data_row_id = "cl7asgri10yvg075b4pz176ht", global_key = "key2")
955-
]
952+
{"data_row_id": "cl7asgri20yvo075b4vtfedjb", "global_key": "key1"},
953+
{"data_row_id": "cl7asgri10yvg075b4pz176ht", "global_key": "key2"},
954+
]
956955
>>> client.assign_global_keys_to_data_rows(global_key_data_row_inputs)
957956
958957
Args:
959-
A list of AssignGlobalKeyToDataRowInput objects.
958+
A list of dicts containing data_row_id and global_key.
960959
Returns:
961960
Returns successful assigned global keys and data rows
962961
"""
962+
validation_errors = []
963+
for input in global_key_to_data_row_inputs:
964+
if "data_row_id" not in input or "global_key" not in input:
965+
validation_errors.append(input)
966+
967+
if len(validation_errors) > 0:
968+
raise ValueError(
969+
f"Must provide a list of dicts containing both `data_row_id` and `global_key`. The following dict(s) are invalid: {validation_errors}."
970+
)
963971

964-
mutation_str = """mutation assignGlobalKeysToDataRowsPyApi($globalKeyDataRowLinks: [AssignGlobalKeyToDataRowInput!]!) {
972+
query_str = """mutation assignGlobalKeysToDataRowsPyApi($globalKeyDataRowLinks: [AssignGlobalKeyToDataRowInput!]!) {
965973
assignGlobalKeysToDataRows(data: {assignInputs: $globalKeyDataRowLinks}) {
966974
jobId
967975
}
968976
}
969977
"""
970-
mutation_params = {
971-
'globalKeyDataRowLinks': [
972-
input.dict(by_alias=True)
973-
for input in global_key_to_data_row_inputs
974-
]
978+
979+
params = {
980+
'globalKeyDataRowLinks': [{
981+
utils.camel_case(key): value for key, value in input.items()
982+
} for input in global_key_to_data_row_inputs]
975983
}
976-
assign_global_keys_to_data_rows_job = self.execute(
977-
mutation_str, mutation_params)
978984

979-
get_failed_assignments_str = """query getDataRowsForGlobalKeysPyApi($jobId: ID!) {
985+
assign_global_keys_to_data_rows_job = self.execute(query_str, params)
986+
987+
result_query_str = """query assignGlobalKeysToDataRowsResultPyApi($jobId: ID!) {
980988
assignGlobalKeysToDataRowsResult(jobId: {id: $jobId}) {
981989
jobStatus
982990
data {
@@ -998,15 +1006,15 @@ def assign_global_keys_to_data_rows(
9981006
}
9991007
}}}
10001008
"""
1001-
get_failed_assignments_params = {
1009+
result_params = {
10021010
"jobId":
10031011
assign_global_keys_to_data_rows_job["assignGlobalKeysToDataRows"
10041012
]["jobId"]
10051013
}
1006-
1007-
while timeout >= 0:
1008-
res = self.execute(get_failed_assignments_str,
1009-
get_failed_assignments_params)
1014+
sleep_time = 2
1015+
start_time = time.time()
1016+
while True:
1017+
res = self.execute(result_query_str, result_params)
10101018
if res["assignGlobalKeysToDataRowsResult"][
10111019
"jobStatus"] == "COMPLETE":
10121020
errors = []
@@ -1017,60 +1025,95 @@ def assign_global_keys_to_data_rows(
10171025
if res['accessDeniedAssignments']:
10181026
errors.append("Access Denied Assignments: " +
10191027
str(res['accessDeniedAssignments']))
1028+
success = []
1029+
if res['sanitizedAssignments']:
1030+
success.append("Sanitized Assignments: " +
1031+
str(res['sanitizedAssignments']))
1032+
if res['unmodifiedAssignments']:
1033+
success.append("Unmodified Assignments: " +
1034+
str(res['unmodifiedAssignments']))
1035+
10201036
if len(errors) > 0:
10211037
raise Exception(
10221038
"Failed to assign global keys to data rows: " +
1023-
str(errors))
1039+
str(errors) + "\n" + str(success))
10241040
return res['sanitizedAssignments'] + res['unmodifiedAssignments']
1025-
time.sleep(2)
1026-
timeout -= 2
1027-
1028-
raise labelbox.exceptions.TimeoutError(
1029-
"Timed out waiting for assign global keys to data rows job to complete."
1030-
)
1041+
elif res["assignGlobalKeysToDataRowsResult"][
1042+
"jobStatus"] == "FAILED":
1043+
raise labelbox.exceptions.LabelboxError(
1044+
"Job assign_global_keys_to_data_rows failed.")
1045+
current_time = time.time()
1046+
if current_time - start_time > timeout_seconds:
1047+
raise labelbox.exceptions.TimeoutError(
1048+
"Timed out waiting for assign_global_keys_to_data_rows job to complete."
1049+
)
1050+
time.sleep(sleep_time)
10311051

1032-
def get_data_row_ids_for_global_keys(self,
1033-
global_keys: List[str],
1034-
timeout=30) -> List[Dict[str, str]]:
1052+
def get_data_rows_for_global_keys(
1053+
self,
1054+
global_keys: List[str],
1055+
timeout_seconds=60) -> List[Dict[str, str]]:
10351056
"""
1036-
Gets data row ids for a list of global keys.
1057+
Gets data rows for a list of global keys.
10371058
1038-
>>> data_row_ids = client.get_data_row_ids_for_global_keys(["key1",])
1059+
>>> data_rows = client.get_data_row_ids_for_global_keys(["key1",])
10391060
10401061
Args:
10411062
A list of global keys
10421063
Returns:
1043-
A list of data row ids. Returns empty if the global keys are not found.
1064+
TODO: Better description
10441065
"""
10451066

1046-
get_job_query_str = """query getDataRowsForGlobalKeysJobPyApi($globalKeys: [ID!]!) {
1067+
query_str = """query getDataRowsForGlobalKeysPyApi($globalKeys: [ID!]!) {
10471068
dataRowsForGlobalKeys(where: {ids: $globalKeys}) { jobId}}
10481069
"""
1049-
get_job_params = {"globalKeys": global_keys}
1070+
params = {"globalKeys": global_keys}
10501071

1051-
data_rows_for_global_keys_job = self.execute(get_job_query_str,
1052-
get_job_params)
1072+
data_rows_for_global_keys_job = self.execute(query_str, params)
10531073

1054-
get_data_rows_str = """query getDataRowsForGlobalKeysPyApi($jobId: ID!) {
1074+
result_query_str = """query getDataRowsForGlobalKeysResultPyApi($jobId: ID!) {
10551075
dataRowsForGlobalKeysResult(jobId: {id: $jobId}) { data {
10561076
fetchedDataRows {id}
10571077
notFoundGlobalKeys
10581078
accessDeniedGlobalKeys
10591079
deletedDataRowGlobalKeys
10601080
} jobStatus}}
10611081
"""
1062-
get_data_rows_params = {
1082+
result_params = {
10631083
"jobId":
10641084
data_rows_for_global_keys_job["dataRowsForGlobalKeys"]["jobId"]
10651085
}
10661086

1067-
while timeout >= 0:
1068-
res = self.execute(get_data_rows_str, get_data_rows_params)
1087+
sleep_time = 2
1088+
start_time = time.time()
1089+
while True:
1090+
res = self.execute(result_query_str, result_params)
10691091
if res["dataRowsForGlobalKeysResult"]['jobStatus'] == "COMPLETE":
10701092
return res["dataRowsForGlobalKeysResult"]['data'][
10711093
'fetchedDataRows']
1072-
time.sleep(2)
1073-
timeout -= 2
1094+
elif res["dataRowsForGlobalKeysResult"]['jobStatus'] == "FAILED":
1095+
raise labelbox.exceptions.LabelboxError(
1096+
"Job get_data_rows_for_global_keys failed.")
1097+
current_time = time.time()
1098+
if current_time - start_time > timeout_seconds:
1099+
raise labelbox.exceptions.TimeoutError(
1100+
"Timed out waiting for get_data_rows_for_global_keys job to complete."
1101+
)
1102+
time.sleep(sleep_time)
1103+
1104+
def get_data_row_ids_for_global_keys(
1105+
self,
1106+
global_keys: List[str],
1107+
timeout_seconds=60) -> List[Dict[str, str]]:
1108+
"""
1109+
Gets data row ids for a list of global keys.
10741110
1075-
raise labelbox.exceptions.TimeoutError(
1076-
"Timed out waiting for data rows for global keys job to complete.")
1111+
>>> data_row_ids = client.get_data_row_ids_for_global_keys(["key1",])
1112+
1113+
Args:
1114+
A list of global keys
1115+
Returns:
1116+
TODO: Better description
1117+
"""
1118+
# TODO: Invoke get_data_rows_for_global_keys to extract data row ids
1119+
return self.get_data_rows_for_global_keys(global_keys, timeout_seconds)

labelbox/schema/global_key.py

Lines changed: 0 additions & 15 deletions
This file was deleted.

tests/integration/test_assign_global_key_to_data_row.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
from labelbox.schema.global_key import AssignGlobalKeyToDataRowInput
2-
31
import uuid
2+
import pytest
43

54

65
def test_assign_global_keys_to_data_rows(client, dataset, image_url):
@@ -15,14 +14,38 @@ def test_assign_global_keys_to_data_rows(client, dataset, image_url):
1514
gk_1 = str(uuid.uuid4())
1615
gk_2 = str(uuid.uuid4())
1716

18-
assignment_inputs = [
19-
AssignGlobalKeyToDataRowInput(data_row_id=dr_1.uid, global_key=gk_1),
20-
AssignGlobalKeyToDataRowInput(data_row_id=dr_2.uid, global_key=gk_2)
21-
]
17+
assignment_inputs = [{
18+
"data_row_id": dr_1.uid,
19+
"global_key": gk_1
20+
}, {
21+
"data_row_id": dr_2.uid,
22+
"global_key": gk_2
23+
}]
2224
client.assign_global_keys_to_data_rows(assignment_inputs)
2325

2426
res = client.get_data_row_ids_for_global_keys([gk_1, gk_2])
2527

2628
assert len(res) == 2
2729
successful_assignments = set(a['id'] for a in res)
2830
assert successful_assignments == row_ids
31+
32+
33+
def test_assign_global_keys_to_data_rows_validation_error(client):
34+
assignment_inputs = [{
35+
"data_row_id": "test uid",
36+
"wrong_key": "gk 1"
37+
}, {
38+
"data_row_id": "test uid 2",
39+
"global_key": "gk 2"
40+
}, {
41+
"wrong_key": "test uid 3",
42+
"global_key": "gk 3"
43+
}, {
44+
"data_row_id": "test uid 4"
45+
}, {
46+
"global_key": "gk 5"
47+
}, {}]
48+
with pytest.raises(ValueError) as excinfo:
49+
client.assign_global_keys_to_data_rows(assignment_inputs)
50+
e = """[{'data_row_id': 'test uid', 'wrong_key': 'gk 1'}, {'wrong_key': 'test uid 3', 'global_key': 'gk 3'}, {'data_row_id': 'test uid 4'}, {'global_key': 'gk 5'}, {}]"""
51+
assert e in str(excinfo.value)

tests/unit/test_global_key_to_data_row_input.py

Lines changed: 0 additions & 13 deletions
This file was deleted.

0 commit comments

Comments
 (0)