Skip to content

Commit dbfd966

Browse files
committed
dataset.py: converation bulk upload logic.
1 parent 0197e0a commit dbfd966

File tree

1 file changed

+41
-0
lines changed

1 file changed

+41
-0
lines changed

labelbox/schema/dataset.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ def _create_descriptor_file(self, items, max_attachments_per_data_row=None):
226226
>>> {DataRow.row_data:"/path/to/file1.jpg"},
227227
>>> "path/to/file2.jpg",
228228
>>> {"tileLayerUrl" : "http://", ...}
229+
>>> {"conversation" : "{}", ...}
229230
>>> ])
230231
231232
For an example showing how to upload tiled data_rows see the following notebook:
@@ -280,6 +281,29 @@ def validate_attachments(item):
280281
)
281282
return attachments
282283

284+
def validate_conversational_data(conversational_data: list) -> None:
285+
"""
286+
Checks each conversational message for keys expected as per https://docs.labelbox.com/reference/text-conversational#sample-conversational-json
287+
288+
Args:
289+
conversational_data (list): list of dictionaries.
290+
"""
291+
def check_message_keys(message):
292+
accepted_message_keys = set([
293+
"messageId", "timestampUsec", "content", "user", "align", "canLabel"])
294+
for key in message.keys():
295+
if not key in accepted_message_keys:
296+
raise KeyError(
297+
f"Invalid {key} key found! Accepted keys in messages list is ")
298+
299+
if conversational_data and not isinstance(conversational_data, list):
300+
raise ValueError(
301+
f"conversationalData must be a list. Found {type(conversational_data)}"
302+
)
303+
304+
[check_message_keys(message)
305+
for message in conversational_data]
306+
283307
def parse_metadata_fields(item):
284308
metadata_fields = item.get('metadata_fields')
285309
if metadata_fields:
@@ -321,6 +345,23 @@ def convert_item(item):
321345
if "tileLayerUrl" in item:
322346
validate_attachments(item)
323347
return item
348+
349+
if "conversationalData" in item:
350+
messages = item.pop("conversationalData")
351+
validate_conversational_data(messages)
352+
validate_attachments(item)
353+
one_conversation = \
354+
{
355+
"type": item["type"],
356+
"version": item["version"],
357+
"messages": messages
358+
}
359+
conversationUrl = self.client.upload_data(json.dumps(one_conversation),
360+
content_type="application/json",
361+
filename="conversational_data.json")
362+
item["conversationalUrl"] = conversationUrl
363+
return item
364+
324365
# Convert all payload variations into the same dict format
325366
item = format_row(item)
326367
# Make sure required keys exist (and there are no extra keys)

0 commit comments

Comments
 (0)