Skip to content

Commit 294f14d

Browse files
committed
addition of new test case for pdf documents
1 parent 392806c commit 294f14d

File tree

4 files changed

+297
-0
lines changed

4 files changed

+297
-0
lines changed

labelbox/data/serialization/labelbox_v1/label.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ class LBV1Label(BaseModel):
146146
skipped: Optional[bool] = Extra('Skipped')
147147
media_type: Optional[str] = Extra('media_type')
148148
data_split: Optional[str] = Extra('Data Split')
149+
global_key: Optional[str] = Extra('Global Key')
149150

150151
def to_common(self) -> Label:
151152
if isinstance(self.label, list):

test.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
import base64
2+
import time
3+
import os
4+
from datetime import datetime, timezone
5+
import uuid
6+
import json
7+
import logging
8+
# import cuid
9+
import random
10+
from pprint import pprint
11+
12+
import requests
13+
14+
from labelbox import Client, Project, DataRow, Label
15+
from labelbox.data.serialization.labelbox_v1 import LBV1Converter
16+
from labelbox.schema.annotation_import import LabelImport, MALPredictionImport
17+
from labelbox.schema.labeling_frontend import LabelingFrontend
18+
from labelbox.schema.ontology import Classification, OntologyBuilder, Ontology, Tool, Option
19+
20+
# logging.basicConfig(level=logging.DEBUG)
21+
22+
# ____________________________________________________________________________________
23+
"""HELPER FUNCTIONS"""
24+
25+
26+
def cleanup_my_org():
27+
from datetime import datetime, timezone
28+
date = datetime.strptime("2022-07-15",
29+
"%Y-%m-%d").replace(tzinfo=timezone.utc)
30+
31+
for project in client.get_projects():
32+
if project.created_at > date:
33+
print(project.name)
34+
project.delete()
35+
for dataset in client.get_datasets():
36+
if dataset.created_at > date:
37+
print(dataset.name)
38+
dataset.delete()
39+
# for model in client.get_models():
40+
# model.delete()
41+
42+
43+
def get_lb_client(environment: str = "prod"):
44+
if environment == "prod":
45+
API_KEY = os.environ.get('apikey')
46+
client = Client(API_KEY)
47+
elif environment == "staging":
48+
API_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJjbDN1OXJ3NzIwMDlvMHl4a2ViOHhkdml0Iiwib3JnYW5pemF0aW9uSWQiOiJjbDN1OXJ3Nm4wMDluMHl4azYzczVnNjZwIiwiYXBpS2V5SWQiOiJjbDZldGg2Zm0wZnBtMHkxZWJpazY2bTVlIiwic2VjcmV0IjoiYjhmNjcwZTFkYjdkODNhYzdkYzYzYjQzMjE5MTBkODQiLCJpYXQiOjE2NTk2MDQxNzYsImV4cCI6MjI5MDc1NjE3Nn0.zCYnfXQEQl8PwsJbsBvP3s_cDA-hQbiFcNgIy82uOrQ"
49+
client = Client(API_KEY, endpoint="https://api.lb-stage.xyz/graphql")
50+
elif environment == "local":
51+
API_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJjbDI5M2N2OWUwMDBicnRvdTBneWgyZ3RvIiwib3JnYW5pemF0aW9uSWQiOiJjbDI5M2N2NjAwMDBhcnRvdTl0M3JhMGpzIiwiYXBpS2V5SWQiOiJjbDN2c21icWgwMDBhbjdvdWFldWMwMmcyIiwic2VjcmV0IjoiYTQ4MmZjODU4OGU5YmI0NmJhZDU2YjljZDBhZDcyZTUiLCJpYXQiOjE2NTQxMDAzMTUsImV4cCI6MjI4NTI1MjMxNX0.UJC2uF8Cu6WwBSIZGUZUY8UznP7RCRsG4ns616OFXjI"
52+
client = Client(API_KEY, endpoint='http://localhost:8080/graphql')
53+
else:
54+
print("Invalid environment")
55+
exit(1)
56+
return client
57+
58+
59+
# ____________________________________________________________________________________
60+
61+
os.system('clear')
62+
63+
client = get_lb_client("prod")
64+
65+
# ____________________________________________________________________________________
66+
67+
project = client.get_project("cl6xntneb7t28072bggdydv7a")
68+
# organization_id = client.get_organization().uid
69+
# dataset = client.create_dataset(name="hello world")
70+
# dataset = client.get_dataset("cl6xy8bcw0g7v07068u9t5hiv")
71+
# print(dataset.uid)
72+
# file_path = "/Users/jonathantso/Downloads/sample_batch.txt"
73+
# dataset.create_data_rows(file_path)
74+
# ____________________________________________________________________________________
75+
# annotations = []
76+
77+
# rows = list(project.batches())[0].export_data_rows()
78+
79+
# for row in project.export_queued_data_rows(include_metadata=True):
80+
# print(f"row: {row['id']}, {row['externalId']}")
81+
# for i in range(4):
82+
# annotations.append({
83+
# "uuid": str(uuid.uuid4()),
84+
# "name": "boxy",
85+
# "dataRow": {"id": row['id']},
86+
# "bbox": {"top": round(random.uniform(0,300),2), "left": round(random.uniform(0,300),2), "height": round(random.uniform(200,500),2), "width": round(random.uniform(0,200),2)},
87+
# "unit": "POINTS",
88+
# "page": random.randint(0,9)
89+
# })
90+
91+
# import_annotations = MALPredictionImport.create_from_objects(client=client, project_id = project.uid, name=f"import {str(uuid.uuid4())}", predictions=annotations)
92+
# import_annotations.wait_until_done()
93+
94+
# print(f"\nerrors: {import_annotations.errors}")
95+
#assert should be that import_annotations.errors == []
96+
#____________________________________________________________________________________
97+
# labels = project.label_generator()
98+
with open("/Users/jonathantso/Downloads/export-2022-08-17T18_37_30.233Z.json",
99+
"r") as f:
100+
labels = json.load(f)
101+
print("\nnow deserializing..\n")
102+
labels = LBV1Converter.deserialize(labels)
103+
# for label in labels:
104+
# if label.annotations:
105+
# for obj in label.annotations:
106+
# print(f"\n\t{obj}")
107+
108+
print("\nnow serializing..\n")
109+
labels = LBV1Converter.serialize(labels)
110+
# for label in labels:
111+
# print("serialized")
112+
# print(label, "\n")
113+
labels = LBV1Converter.deserialize(labels)
114+
for label in labels:
115+
if label.annotations:
116+
for obj in label.annotations:
117+
print(f"\n\t{obj}")
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
[{
2+
"ID": "cl6xnzi4a7ldn0729381g7104",
3+
"DataRow ID": "cl6xnv9h61fv0085yhtoq06ht",
4+
"Labeled Data": "https://storage.labelbox.com/ckcz6bubudyfi0855o1dt1g9s%2F4cef4e08-e13d-8a5e-fbbf-c7624babb490-Airbnb_%20Labelbox%20-%20Focus%20on%20Workforce%20-%20Labelbox%20Labeling%20Operations%20(1).pdf?Expires=1661971050348&KeyName=labelbox-assets-key-3&Signature=JK6ral5CXF7T9Q5LaQqKvJy5A2A",
5+
"Label": {
6+
"objects": [{
7+
"featureId": "cl6xnzjpq0dmr07yocs2vfot8",
8+
"schemaId": "cl6xnuwt95lqq07330tbb3mfd",
9+
"color": "#1CE6FF",
10+
"title": "boxy",
11+
"value": "boxy",
12+
"bbox": {
13+
"top": 144.68,
14+
"left": 107.84,
15+
"height": 441.6,
16+
"width": 9.48
17+
},
18+
"page": 0,
19+
"unit": "POINTS",
20+
"instanceURI": "https://api.labelbox.com/masks/feature/cl6xnzjpq0dmr07yocs2vfot8?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2NjOWZtbXc0aGNkMDczOHFpeWM2YW54Iiwib3JnYW5pemF0aW9uSWQiOiJja2N6NmJ1YnVkeWZpMDg1NW8xZHQxZzlzIiwiaWF0IjoxNjYwNzYxNDUwLCJleHAiOjE2NjMzNTM0NTB9.X4-j6zee8o685PUrL9C6oC2m6TayKuJQHhN8iLgG8kI"
21+
}, {
22+
"featureId": "cl6xnzjpq0dms07yobwv68gxf",
23+
"schemaId": "cl6xnuwt95lqq07330tbb3mfd",
24+
"color": "#1CE6FF",
25+
"title": "boxy",
26+
"value": "boxy",
27+
"bbox": {
28+
"top": 162.73,
29+
"left": 32.45,
30+
"height": 388.17,
31+
"width": 101.66
32+
},
33+
"page": 4,
34+
"unit": "POINTS",
35+
"instanceURI": "https://api.labelbox.com/masks/feature/cl6xnzjpq0dms07yobwv68gxf?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2NjOWZtbXc0aGNkMDczOHFpeWM2YW54Iiwib3JnYW5pemF0aW9uSWQiOiJja2N6NmJ1YnVkeWZpMDg1NW8xZHQxZzlzIiwiaWF0IjoxNjYwNzYxNDUwLCJleHAiOjE2NjMzNTM0NTB9.X4-j6zee8o685PUrL9C6oC2m6TayKuJQHhN8iLgG8kI"
36+
}, {
37+
"featureId": "cl6xnzjpq0dmt07yo8pp45gru",
38+
"schemaId": "cl6xnuwt95lqq07330tbb3mfd",
39+
"color": "#1CE6FF",
40+
"title": "boxy",
41+
"value": "boxy",
42+
"bbox": {
43+
"top": 223.26,
44+
"left": 251.42,
45+
"height": 457.04,
46+
"width": 186.78
47+
},
48+
"page": 7,
49+
"unit": "POINTS",
50+
"instanceURI": "https://api.labelbox.com/masks/feature/cl6xnzjpq0dmt07yo8pp45gru?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2NjOWZtbXc0aGNkMDczOHFpeWM2YW54Iiwib3JnYW5pemF0aW9uSWQiOiJja2N6NmJ1YnVkeWZpMDg1NW8xZHQxZzlzIiwiaWF0IjoxNjYwNzYxNDUwLCJleHAiOjE2NjMzNTM0NTB9.X4-j6zee8o685PUrL9C6oC2m6TayKuJQHhN8iLgG8kI"
51+
}, {
52+
"featureId": "cl6xnzjpq0dmu07yo2qik0en4",
53+
"schemaId": "cl6xnuwt95lqq07330tbb3mfd",
54+
"color": "#1CE6FF",
55+
"title": "boxy",
56+
"value": "boxy",
57+
"bbox": {
58+
"top": 32.52,
59+
"left": 218.17,
60+
"height": 231.73,
61+
"width": 110.56
62+
},
63+
"page": 6,
64+
"unit": "POINTS",
65+
"instanceURI": "https://api.labelbox.com/masks/feature/cl6xnzjpq0dmu07yo2qik0en4?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2NjOWZtbXc0aGNkMDczOHFpeWM2YW54Iiwib3JnYW5pemF0aW9uSWQiOiJja2N6NmJ1YnVkeWZpMDg1NW8xZHQxZzlzIiwiaWF0IjoxNjYwNzYxNDUwLCJleHAiOjE2NjMzNTM0NTB9.X4-j6zee8o685PUrL9C6oC2m6TayKuJQHhN8iLgG8kI"
66+
}, {
67+
"featureId": "cl6xnzjpq0dmv07yo7phz7ofz",
68+
"schemaId": "cl6xnuwt95lqq07330tbb3mfd",
69+
"color": "#1CE6FF",
70+
"title": "boxy",
71+
"value": "boxy",
72+
"bbox": {
73+
"top": 117.39,
74+
"left": 4.25,
75+
"height": 456.92,
76+
"width": 164.83
77+
},
78+
"page": 7,
79+
"unit": "POINTS",
80+
"instanceURI": "https://api.labelbox.com/masks/feature/cl6xnzjpq0dmv07yo7phz7ofz?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2NjOWZtbXc0aGNkMDczOHFpeWM2YW54Iiwib3JnYW5pemF0aW9uSWQiOiJja2N6NmJ1YnVkeWZpMDg1NW8xZHQxZzlzIiwiaWF0IjoxNjYwNzYxNDUwLCJleHAiOjE2NjMzNTM0NTB9.X4-j6zee8o685PUrL9C6oC2m6TayKuJQHhN8iLgG8kI"
81+
}, {
82+
"featureId": "cl6xnzjpq0dmw07yofocp6uf6",
83+
"schemaId": "cl6xnuwt95lqq07330tbb3mfd",
84+
"color": "#1CE6FF",
85+
"title": "boxy",
86+
"value": "boxy",
87+
"bbox": {
88+
"top": 82.13,
89+
"left": 217.28,
90+
"height": 279.76,
91+
"width": 82.43
92+
},
93+
"page": 8,
94+
"unit": "POINTS",
95+
"instanceURI": "https://api.labelbox.com/masks/feature/cl6xnzjpq0dmw07yofocp6uf6?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2NjOWZtbXc0aGNkMDczOHFpeWM2YW54Iiwib3JnYW5pemF0aW9uSWQiOiJja2N6NmJ1YnVkeWZpMDg1NW8xZHQxZzlzIiwiaWF0IjoxNjYwNzYxNDUwLCJleHAiOjE2NjMzNTM0NTB9.X4-j6zee8o685PUrL9C6oC2m6TayKuJQHhN8iLgG8kI"
96+
}, {
97+
"featureId": "cl6xnzjpq0dmx07yo0qh40z0n",
98+
"schemaId": "cl6xnuwt95lqq07330tbb3mfd",
99+
"color": "#1CE6FF",
100+
"title": "boxy",
101+
"value": "boxy",
102+
"bbox": {
103+
"top": 298.12,
104+
"left": 83.34,
105+
"height": 203.83,
106+
"width": 0.38
107+
},
108+
"page": 3,
109+
"unit": "POINTS",
110+
"instanceURI": "https://api.labelbox.com/masks/feature/cl6xnzjpq0dmx07yo0qh40z0n?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2NjOWZtbXc0aGNkMDczOHFpeWM2YW54Iiwib3JnYW5pemF0aW9uSWQiOiJja2N6NmJ1YnVkeWZpMDg1NW8xZHQxZzlzIiwiaWF0IjoxNjYwNzYxNDUwLCJleHAiOjE2NjMzNTM0NTB9.X4-j6zee8o685PUrL9C6oC2m6TayKuJQHhN8iLgG8kI"
111+
}],
112+
"classifications": [],
113+
"relationships": []
114+
},
115+
"Created By": "jtso@labelbox.com",
116+
"Project Name": "PDF MAL Test",
117+
"Created At": "2022-08-17T18:37:18.000Z",
118+
"Updated At": "2022-08-17T18:37:20.073Z",
119+
"Seconds to Label": 15.003,
120+
"External ID": "Airbnb_ Labelbox - Focus on Workforce - Labelbox Labeling Operations (1).pdf",
121+
"Global Key": null,
122+
"Agreement": -1,
123+
"Benchmark Agreement": -1,
124+
"Benchmark ID": null,
125+
"Dataset Name": "PDF ",
126+
"Reviews": [],
127+
"View Label": "https://editor.labelbox.com?project=cl6xntneb7t28072bggdydv7a&label=cl6xnzi4a7ldn0729381g7104",
128+
"Has Open Issues": 0,
129+
"Skipped": false
130+
}]
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import json
2+
from typing import Dict, Any
3+
4+
from labelbox.data.serialization.labelbox_v1.converter import LBV1Converter
5+
6+
IGNORE_KEYS = [
7+
"Data Split", "media_type", "DataRow Metadata", "Media Attributes"
8+
]
9+
10+
11+
def round_dict(data: Dict[str, Any]) -> Dict[str, Any]:
12+
print("hi", data)
13+
for key in data:
14+
print("me key", key)
15+
if isinstance(data[key], float):
16+
print("i am float", key)
17+
data[key] = int(data[key])
18+
elif isinstance(data[key], dict):
19+
print("i am dict", key)
20+
data[key] = round_dict(data[key])
21+
return data
22+
23+
24+
def test_pdf():
25+
"""
26+
Tests an export from a pdf document with only bounding boxes
27+
"""
28+
payload = json.load(
29+
open('tests/data/assets/labelbox_v1/pdf_export.json', 'r'))
30+
collection = LBV1Converter.deserialize(payload)
31+
serialized = next(LBV1Converter.serialize(collection))
32+
33+
payload = payload[0] # only one document in the export
34+
35+
serialized = {k: v for k, v in serialized.items() if k not in IGNORE_KEYS}
36+
37+
assert serialized.keys() == payload.keys()
38+
for key in payload.keys():
39+
if key == 'Label':
40+
serialized_no_classes = [{
41+
k: v for k, v in dic.items() if k != 'classifications'
42+
} for dic in serialized[key]['objects']]
43+
serialized_round = [
44+
round_dict(dic) for dic in serialized_no_classes
45+
]
46+
payload_round = [round_dict(dic) for dic in payload[key]['objects']]
47+
assert payload_round == serialized_round
48+
else:
49+
assert serialized[key] == payload[key]

0 commit comments

Comments
 (0)