Skip to content

Commit 0be6ef0

Browse files
authored
Merge pull request #637 from Labelbox/jtso/al-3045
[AL-3045] Max Datarow Upload Per Operation
2 parents 2f4db88 + d51e6ef commit 0be6ef0

File tree

2 files changed

+23
-5
lines changed

2 files changed

+23
-5
lines changed

labelbox/schema/dataset.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,16 @@
1515
from labelbox.exceptions import InvalidQueryError, LabelboxError, ResourceNotFoundError, InvalidAttributeError
1616
from labelbox.orm.db_object import DbObject, Updateable, Deletable
1717
from labelbox.orm.model import Entity, Field, Relationship
18+
from labelbox.exceptions import MalformedQueryException
1819

1920
if TYPE_CHECKING:
2021
from labelbox import Task, User, DataRow
2122

2223
logger = logging.getLogger(__name__)
2324

25+
MAX_DATAROW_PER_API_OPERATION = 150000
26+
MAX_DATAROW_WITH_METADATA = 30000
27+
2428

2529
class Dataset(DbObject, Updateable, Deletable):
2630
""" A Dataset is a collection of DataRows.
@@ -391,14 +395,18 @@ def convert_item(item):
391395
f"Must pass an iterable to create_data_rows. Found {type(items)}"
392396
)
393397

398+
if len(items) > MAX_DATAROW_PER_API_OPERATION:
399+
raise MalformedQueryException(
400+
f"Cannot create more than {MAX_DATAROW_PER_API_OPERATION} DataRows per function call."
401+
)
402+
394403
# TODO: If any datarows contain metadata, we're limiting max # of datarows
395404
# until we address performance issues with datarow create with metadata
396-
max_datarow_with_metadata = 30_000
397-
if (len(items) > max_datarow_with_metadata):
405+
if len(items) > MAX_DATAROW_WITH_METADATA:
398406
for row in items:
399407
if 'metadata_fields' in row:
400-
raise ValueError(
401-
f"Cannot create more than {max_datarow_with_metadata} DataRows, if any DataRows contain metadata"
408+
raise MalformedQueryException(
409+
f"Cannot create more than {MAX_DATAROW_WITH_METADATA} DataRows, if any DataRows contain metadata"
402410
)
403411

404412
with ThreadPoolExecutor(file_upload_thread_count) as executor:

tests/integration/test_dataset.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
import pytest
33
import requests
44
from labelbox import Dataset
5-
from labelbox.exceptions import ResourceNotFoundError
5+
from labelbox.exceptions import ResourceNotFoundError, MalformedQueryException
6+
from labelbox.schema.dataset import MAX_DATAROW_PER_API_OPERATION
67

78

89
def test_dataset(client, rand_gen):
@@ -137,3 +138,12 @@ def test_create_descriptor_file(dataset):
137138
'content_type': 'application/json',
138139
'filename': 'json_import.json'
139140
}
141+
142+
143+
def test_max_dataset_datarow_upload(dataset, image_url, rand_gen):
144+
external_id = str(rand_gen)
145+
items = [dict(row_data=image_url, external_id=external_id)
146+
] * (MAX_DATAROW_PER_API_OPERATION + 1)
147+
148+
with pytest.raises(MalformedQueryException):
149+
dataset.create_data_rows(items)

0 commit comments

Comments
 (0)