Skip to content

Commit d53098b

Browse files
author
Val Brodsky
committed
Update tests
Add DocumentEntity to tests Added integration test
1 parent 53381d0 commit d53098b

File tree

13 files changed

+771
-664
lines changed

13 files changed

+771
-664
lines changed

examples/annotation_import/pdf.ipynb

Lines changed: 662 additions & 654 deletions
Large diffs are not rendered by default.

labelbox/data/annotation_types/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
from .annotation import VideoObjectAnnotation
1212

1313
from .ner import TextEntity
14+
from .ner import DocumentEntity
15+
from .ner import DocumentTextSelection
1416

1517
from .classification import Checklist
1618
from .classification import ClassificationAnswer

labelbox/data/annotation_types/annotation.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from .classification import Checklist, Dropdown, Radio, Text
77
from .feature import FeatureSchema
88
from .geometry import Geometry, Rectangle, Point
9-
from .ner import TextEntity
9+
from .ner import DocumentEntity, TextEntity
1010

1111

1212
class BaseAnnotation(FeatureSchema, abc.ABC):
@@ -51,7 +51,7 @@ class ObjectAnnotation(BaseAnnotation, ConfidenceMixin):
5151
classifications (Optional[List[ClassificationAnnotation]]): Optional sub classification of the annotation
5252
extra (Dict[str, Any])
5353
"""
54-
value: Union[TextEntity, Geometry]
54+
value: Union[TextEntity, DocumentEntity, Geometry]
5555
classifications: List[ClassificationAnnotation] = []
5656

5757

labelbox/data/annotation_types/label.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import labelbox
88
from labelbox.data.annotation_types.data.tiled_image import TiledImageData
9+
from labelbox.data.annotation_types.ner import DocumentEntity
910
from labelbox.schema import ontology
1011
from .annotation import (ClassificationAnnotation, ObjectAnnotation,
1112
VideoClassificationAnnotation, VideoObjectAnnotation)

labelbox/data/annotation_types/ner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,4 @@ def validate_page(cls, v):
3333
class DocumentEntity(BaseModel):
3434
""" Represents a text entity """
3535
name: str
36-
text_selections: List[DocumentTextSelection]
36+
textSelections: List[DocumentTextSelection]

labelbox/data/serialization/ndjson/converter.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def serialize(
4040
Returns:
4141
A generator for accessing the ndjson representation of the data
4242
"""
43+
4344
for example in NDLabel.from_common(labels):
4445
res = example.dict(by_alias=True)
4546
for k, v in list(res.items()):

labelbox/data/serialization/ndjson/objects.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -376,12 +376,15 @@ def from_common(cls,
376376

377377
class NDDocumentEntity(NDBaseObject, ConfidenceMixin):
378378
name: str
379-
text_selections: List[DocumentTextSelection]
379+
textSelections: List[DocumentTextSelection]
380380

381381

382382
def to_common(self) -> DocumentEntity:
383-
return TextEntity(name=self.name, text_selections=self.text_selections)
383+
return TextEntity(name=self.name, textSelections=self.textSelections)
384384

385+
return obj.from_common(annotation.value, subclasses, annotation.name,
386+
annotation.feature_schema_id, annotation.extra,
387+
data, **optional_kwargs)
385388
@classmethod
386389
def from_common(cls,
387390
document_entity: DocumentEntity,
@@ -391,7 +394,8 @@ def from_common(cls,
391394
extra: Dict[str, Any],
392395
data: Union[ImageData, TextData],
393396
confidence: Optional[float] = None) -> "NDDocumentEntity":
394-
return cls(text_selections=document_entity.text_selections,
397+
398+
return cls(textSelections=document_entity.textSelections,
395399
dataRow=DataRow(id=data.uid),
396400
name=name,
397401
schema_id=feature_schema_id,
@@ -446,7 +450,8 @@ def from_common(
446450
optional_kwargs = {}
447451
if (annotation.confidence):
448452
optional_kwargs['confidence'] = annotation.confidence
449-
return obj.from_common(annotation.value, subclasses, annotation.name,
453+
name = annotation.name or annotation.value.name
454+
return obj.from_common(annotation.value, subclasses, name,
450455
annotation.feature_schema_id, annotation.extra,
451456
data, **optional_kwargs)
452457

tests/assets/arxiv-pdf_data_99-word-token-pdfs_0801.3483-lb-textlayer.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.
332 KB
Binary file not shown.
Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from labelbox.data.annotation_types import TextEntity
1+
from labelbox.data.annotation_types import TextEntity, DocumentEntity, DocumentTextSelection
22

33

44
def test_ner():
@@ -7,3 +7,11 @@ def test_ner():
77
text_entity = TextEntity(start=start, end=end)
88
assert text_entity.start == start
99
assert text_entity.end == end
10+
11+
def test_document_entity():
12+
document_entity = DocumentEntity(name="tool_name", textSelections=[DocumentTextSelection(tokenIds=["1", "2"], groupId="1", page=1)])
13+
14+
assert document_entity.name == "tool_name"
15+
assert document_entity.textSelections[0].tokenIds == ["1", "2"]
16+
assert document_entity.textSelections[0].groupId == "1"
17+
assert document_entity.textSelections[0].page == 1

0 commit comments

Comments
 (0)