Skip to content

Commit 4b061f8

Browse files
author
Matt Sokoloff
committed
fetch data row ids by external ids
1 parent f154a53 commit 4b061f8

File tree

2 files changed

+60
-0
lines changed

2 files changed

+60
-0
lines changed

labelbox/client.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# type: ignore
22
from datetime import datetime, timezone
33
import json
4+
from typing import List, Dict
5+
from collections import defaultdict
46

57
import logging
68
import mimetypes
@@ -658,3 +660,29 @@ def create_model(self, name, ontology_id):
658660
"ontologyId": ontology_id
659661
})
660662
return Model(self, result['createModel'])
663+
664+
def get_data_row_ids_for_external_ids(
665+
self, external_ids: List[str]) -> Dict[str, List[str]]:
666+
"""
667+
Returns a list of data row ids for a list of external ids.
668+
There is a max of 1500 items returned at a time.
669+
670+
Args:
671+
external_ids: List of external ids to fetch data row ids for
672+
673+
Returns:
674+
A dict of external ids as keys and values as a list of data row ids that correspond to that external id.
675+
"""
676+
query_str = """query externalIdsToDataRowIdsPyApi($externalId_in: [String!]!){
677+
externalIdsToDataRowIds(externalId_in: $externalId_in) { dataRowId externalId }
678+
}
679+
"""
680+
max_n_per_request = 100
681+
result = defaultdict(list)
682+
for i in range(0, len(external_ids), max_n_per_request):
683+
for row in self.execute(
684+
query_str,
685+
{'externalId_in': external_ids[i:i + max_n_per_request]
686+
})['externalIdsToDataRowIds']:
687+
result[row['externalId']].append(row['dataRowId'])
688+
return result

tests/integration/test_data_rows.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
from tempfile import NamedTemporaryFile
2+
import uuid
3+
import time
24

35
import pytest
46
import requests
@@ -11,6 +13,36 @@ def test_get_data_row(datarow, client):
1113
assert client.get_data_row(datarow.uid)
1214

1315

16+
def test_lookup_data_rows(client, dataset):
17+
uid = str(uuid.uuid4())
18+
# 1 external id : 1 uid
19+
dr = dataset.create_data_row(row_data="123", external_id=uid)
20+
lookup = client.get_data_row_ids_for_external_ids([uid])
21+
assert len(lookup) == 1
22+
assert lookup[uid][0] == dr.uid
23+
# 2 external ids : 1 uid
24+
uid2 = str(uuid.uuid4())
25+
dr2 = dataset.create_data_row(row_data="123", external_id=uid2)
26+
lookup = client.get_data_row_ids_for_external_ids([uid, uid2])
27+
assert len(lookup) == 2
28+
assert all([len(x) == 1 for x in lookup.values()])
29+
assert lookup[uid][0] == dr.uid
30+
assert lookup[uid2][0] == dr2.uid
31+
#1 external id : 2 uid
32+
dr3 = dataset.create_data_row(row_data="123", external_id=uid2)
33+
lookup = client.get_data_row_ids_for_external_ids([uid2])
34+
assert len(lookup) == 1
35+
assert len(lookup[uid2]) == 2
36+
assert lookup[uid2][0] == dr2.uid
37+
assert lookup[uid2][1] == dr3.uid
38+
# Empty args
39+
lookup = client.get_data_row_ids_for_external_ids([])
40+
assert len(lookup) == 0
41+
# Non matching
42+
lookup = client.get_data_row_ids_for_external_ids([str(uuid.uuid4())])
43+
assert len(lookup) == 0
44+
45+
1446
def test_data_row_bulk_creation(dataset, rand_gen, image_url):
1547
client = dataset.client
1648
assert len(list(dataset.data_rows())) == 0

0 commit comments

Comments
 (0)