[PTDT-1867] Add llm data gen label types (#1317)

lgluszek · web-flow · commit 3f894f0dbc4f · 2023-12-08T19:18:06.000+01:00
diff --git a/labelbox/data/annotation_types/__init__.py b/labelbox/data/annotation_types/__init__.py
@@ -42,6 +42,9 @@
 from .data import MaskData
 from .data import TextData
 from .data import VideoData
+from .data import LlmPromptResponseCreationData
+from .data import LlmPromptCreationData
+from .data import LlmResponseCreationData
 
 from .label import Label
 from .collection import LabelList
diff --git a/labelbox/data/annotation_types/data/__init__.py b/labelbox/data/annotation_types/data/__init__.py
@@ -6,4 +6,7 @@
 from .raster import ImageData
 from .raster import MaskData
 from .text import TextData
-from .video import VideoData
+from .video import VideoData
+from .llm_prompt_response_creation import LlmPromptResponseCreationData
+from .llm_prompt_creation import LlmPromptCreationData
+from .llm_response_creation import LlmResponseCreationData
diff --git a/labelbox/data/annotation_types/data/llm_prompt_creation.py b/labelbox/data/annotation_types/data/llm_prompt_creation.py
@@ -0,0 +1,7 @@
+from labelbox.typing_imports import Literal
+from labelbox.utils import _NoCoercionMixin
+from .base_data import BaseData
+
+
+class LlmPromptCreationData(BaseData, _NoCoercionMixin):
+    class_name: Literal["LlmPromptCreationData"] = "LlmPromptCreationData"
diff --git a/labelbox/data/annotation_types/data/llm_prompt_response_creation.py b/labelbox/data/annotation_types/data/llm_prompt_response_creation.py
@@ -0,0 +1,8 @@
+from labelbox.typing_imports import Literal
+from labelbox.utils import _NoCoercionMixin
+from .base_data import BaseData
+
+
+class LlmPromptResponseCreationData(BaseData, _NoCoercionMixin):
+    class_name: Literal[
+        "LlmPromptResponseCreationData"] = "LlmPromptResponseCreationData"
diff --git a/labelbox/data/annotation_types/data/llm_response_creation.py b/labelbox/data/annotation_types/data/llm_response_creation.py
@@ -0,0 +1,7 @@
+from labelbox.typing_imports import Literal
+from labelbox.utils import _NoCoercionMixin
+from .base_data import BaseData
+
+
+class LlmResponseCreationData(BaseData, _NoCoercionMixin):
+    class_name: Literal["LlmResponseCreationData"] = "LlmResponseCreationData"
diff --git a/labelbox/data/annotation_types/label.py b/labelbox/data/annotation_types/label.py
@@ -10,7 +10,7 @@
 from .annotation import ClassificationAnnotation, ObjectAnnotation
 from .relationship import RelationshipAnnotation
 from .classification import ClassificationAnswer
-from .data import AudioData, ConversationData, DicomData, DocumentData, HTMLData, ImageData, MaskData, TextData, VideoData
+from .data import AudioData, ConversationData, DicomData, DocumentData, HTMLData, ImageData, MaskData, TextData, VideoData, LlmPromptCreationData, LlmPromptResponseCreationData, LlmResponseCreationData
 from .geometry import Mask
 from .metrics import ScalarMetric, ConfusionMatrixMetric
 from .types import Cuid
@@ -19,7 +19,9 @@
 from ..ontology import get_feature_schema_lookup
 
 DataType = Union[VideoData, ImageData, TextData, TiledImageData, AudioData,
-                 ConversationData, DicomData, DocumentData, HTMLData]
+                 ConversationData, DicomData, DocumentData, HTMLData,
+                 LlmPromptCreationData, LlmPromptResponseCreationData,
+                 LlmResponseCreationData]
 
 
 class Label(BaseModel):
diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py
@@ -122,11 +122,35 @@ def text_data_row(rand_gen):
     }
 
 
+@pytest.fixture()
+def llm_prompt_creation_data_row(rand_gen):
+    return {
+        "row_data": {
+            "type": "application/llm.prompt-creation",
+            "version": 1
+        },
+        "global_key": rand_gen(str)
+    }
+
+
+@pytest.fixture()
+def llm_prompt_response_data_row(rand_gen):
+    return {
+        "row_data": {
+            "type": "application/llm.prompt-response-creation",
+            "version": 1
+        },
+        "global_key": rand_gen(str)
+    }
+
+
 @pytest.fixture
 def data_row_json_by_data_type(audio_data_row, conversation_data_row,
                                dicom_data_row, geospatial_data_row,
                                html_data_row, image_data_row, document_data_row,
-                               text_data_row, video_data_row):
+                               text_data_row, video_data_row,
+                               llm_prompt_creation_data_row,
+                               llm_prompt_response_data_row):
     return {
         'audio': audio_data_row,
         'conversation': conversation_data_row,
@@ -137,6 +161,9 @@ def data_row_json_by_data_type(audio_data_row, conversation_data_row,
         'document': document_data_row,
         'text': text_data_row,
         'video': video_data_row,
+        'llmpromptcreation': llm_prompt_creation_data_row,
+        'llmpromptresponsecreation': llm_prompt_response_data_row,
+        'llmresponsecreation': text_data_row
     }
 
 
@@ -146,16 +173,33 @@ def exports_v2_by_data_type(expected_export_v2_image, expected_export_v2_audio,
                             expected_export_v2_video,
                             expected_export_v2_conversation,
                             expected_export_v2_dicom,
-                            expected_export_v2_document):
+                            expected_export_v2_document,
+                            expected_export_v2_llm_prompt_creation,
+                            expected_export_v2_llm_prompt_response_creation,
+                            expected_export_v2_llm_response_creation):
     return {
-        'image': expected_export_v2_image,
-        'audio': expected_export_v2_audio,
-        'html': expected_export_v2_html,
-        'text': expected_export_v2_text,
-        'video': expected_export_v2_video,
-        'conversation': expected_export_v2_conversation,
-        'dicom': expected_export_v2_dicom,
-        'document': expected_export_v2_document,
+        'image':
+            expected_export_v2_image,
+        'audio':
+            expected_export_v2_audio,
+        'html':
+            expected_export_v2_html,
+        'text':
+            expected_export_v2_text,
+        'video':
+            expected_export_v2_video,
+        'conversation':
+            expected_export_v2_conversation,
+        'dicom':
+            expected_export_v2_dicom,
+        'document':
+            expected_export_v2_document,
+        'llmpromptcreation':
+            expected_export_v2_llm_prompt_creation,
+        'llmpromptresponsecreation':
+            expected_export_v2_llm_prompt_response_creation,
+        'llmresponsecreation':
+            expected_export_v2_llm_response_creation
     }
 
 
@@ -179,7 +223,10 @@ def annotations_by_data_type(polygon_inference, rectangle_inference,
             checklist_inference, text_inference
         ],
         'text': [entity_inference, checklist_inference, text_inference],
-        'video': [video_checklist_inference]
+        'video': [video_checklist_inference],
+        'llmpromptcreation': [checklist_inference, text_inference],
+        'llmpromptresponsecreation': [checklist_inference, text_inference],
+        'llmresponsecreation': [checklist_inference, text_inference]
     }
 
 
@@ -207,7 +254,10 @@ def annotations_by_data_type_v2(
             checklist_inference, text_inference
         ],
         'text': [entity_inference, checklist_inference, text_inference],
-        'video': [video_checklist_inference]
+        'video': [video_checklist_inference],
+        'llmpromptcreation': [checklist_inference, text_inference],
+        'llmpromptresponsecreation': [checklist_inference, text_inference],
+        'llmresponsecreation': [checklist_inference, text_inference]
     }
 
 
diff --git a/tests/integration/annotation_import/fixtures/export_v2.py b/tests/integration/annotation_import/fixtures/export_v2.py
@@ -317,3 +317,66 @@ def expected_export_v2_document():
         'relationships': []
     }
     return expected_annotations
+
+
+@pytest.fixture()
+def expected_export_v2_llm_prompt_creation():
+    expected_annotations = {
+        'objects': [],
+        'classifications': [{
+            'name': 'checklist',
+            'checklist_answers': [{
+                'name': 'option1',
+                'classifications': []
+            }]
+        }, {
+            'name': 'text',
+            'text_answer': {
+                'content': 'free form text...'
+            }
+        }],
+        'relationships': []
+    }
+    return expected_annotations
+
+
+@pytest.fixture()
+def expected_export_v2_llm_prompt_response_creation():
+    expected_annotations = {
+        'objects': [],
+        'classifications': [{
+            'name': 'checklist',
+            'checklist_answers': [{
+                'name': 'option1',
+                'classifications': []
+            }]
+        }, {
+            'name': 'text',
+            'text_answer': {
+                'content': 'free form text...'
+            }
+        }],
+        'relationships': []
+    }
+    return expected_annotations
+
+
+@pytest.fixture()
+def expected_export_v2_llm_response_creation():
+    expected_annotations = {
+        'objects': [],
+        'classifications': [{
+            'name': 'checklist',
+            'checklist_answers': [{
+                'name': 'option1',
+                'classifications': []
+            }]
+        }, {
+            'name': 'text',
+            'text_answer': {
+                'content': 'free form text...'
+            }
+        }],
+        'relationships': []
+    }
+    return expected_annotations
diff --git a/tests/integration/annotation_import/test_data_types.py b/tests/integration/annotation_import/test_data_types.py
@@ -8,7 +8,7 @@
 from labelbox.schema.data_row import DataRow
 from labelbox.schema.media_type import MediaType
 import labelbox.types as lb_types
-from labelbox.data.annotation_types.data import AudioData, ConversationData, DicomData, DocumentData, HTMLData, ImageData, TextData
+from labelbox.data.annotation_types.data import AudioData, ConversationData, DicomData, DocumentData, HTMLData, ImageData, TextData, LlmPromptCreationData, LlmPromptResponseCreationData, LlmResponseCreationData
 from labelbox.data.serialization import NDJsonConverter
 from labelbox.schema.annotation_import import AnnotationImportState
 from utils import remove_keys_recursive, rename_cuid_key_recursive
@@ -134,7 +134,8 @@ def create_data_row_for_project(project, dataset, data_row_ndjson, batch_name):
 # TODO: Add VideoData. Currently label import job finishes without errors but project.export_labels() returns empty list.
 @pytest.mark.parametrize('data_type_class', [
     AudioData, ConversationData, DicomData, DocumentData, HTMLData, ImageData,
-    TextData
+    TextData, LlmPromptCreationData, LlmPromptResponseCreationData,
+    LlmResponseCreationData
 ])
 def test_import_data_types(
     client,
@@ -243,12 +244,19 @@ def set_project_media_type_from_data_type(project, data_type_class):
     media_type = to_pascal_case(data_type_string)
     if media_type == 'Conversation':
         media_type = 'Conversational'
+    elif media_type == 'Llmpromptcreation':
+        media_type = 'LLMPromptCreation'
+    elif media_type == 'Llmpromptresponsecreation':
+        media_type = 'LLMPromptResponseCreation'
+    elif media_type == 'Llmresponsecreation':
+        media_type = 'Text'
     project.update(media_type=MediaType[media_type])
 
 
 @pytest.mark.parametrize('data_type_class', [
     AudioData, HTMLData, ImageData, TextData, VideoData, ConversationData,
-    DocumentData, DicomData
+    DocumentData, DicomData, LlmPromptCreationData,
+    LlmPromptResponseCreationData, LlmResponseCreationData
 ])
 def test_import_data_types_v2(client, configured_project, initial_dataset,
                               data_row_json_by_data_type,