Skip to content

Commit 954a8f2

Browse files
author
PJEstrada
authored
Merge pull request #5 from diffgram/overwrite-flag
File Update Overwrite Flag
2 parents cccc75f + dc90d29 commit 954a8f2

File tree

14 files changed

+857
-394
lines changed

14 files changed

+857
-394
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
.vs/PythonSettings.json
55
.vs/VSWorkspaceState.json
66

7+
*.pyc
78
.idea/
89

910
sdk/diffgram/__pycache__/

sdk/diffgram/core/core.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,6 @@ def set_default_directory(self,
240240
self.directory_id = self.default_directory['id']
241241

242242
self.directory_list = data["directory_list"]
243-
244243
self.session.headers.update(
245244
{'directory_id': str(self.directory_id)})
246245

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
from PIL import Image, ImageDraw
2+
from imageio import imread
3+
import numpy as np
4+
5+
class DiffgramDatasetIterator:
6+
7+
def __init__(self, project, diffgram_file_id_list):
8+
"""
9+
10+
:param project (sdk.core.core.Project): A Project object from the Diffgram SDK
11+
:param diffgram_file_list (list): An arbitrary number of file ID's from Diffgram.
12+
"""
13+
self.diffgram_file_id_list = diffgram_file_id_list
14+
15+
self.project = project
16+
self._internal_file_list = []
17+
self.__validate_file_ids()
18+
self.current_file_index = 0
19+
20+
def __iter__(self):
21+
self.current_file_index = 0
22+
return self
23+
24+
def __len__(self):
25+
return len(self.diffgram_file_id_list)
26+
27+
def __getitem__(self, idx):
28+
diffgram_file = self.project.file.get_by_id(self.diffgram_file_id_list[idx], with_instances = True)
29+
instance_data = self.get_file_instances(diffgram_file)
30+
return instance_data
31+
32+
def __next__(self):
33+
file_id = self.diffgram_file_id_list[self.current_file_index]
34+
diffgram_file = self.project.file.get_by_id(file_id, with_instances = True)
35+
instance_data = self.get_file_instances(diffgram_file)
36+
self.current_file_index += 1
37+
return instance_data
38+
39+
def __validate_file_ids(self):
40+
result = self.project.file.file_list_exists(self.diffgram_file_id_list)
41+
if not result:
42+
raise Exception(
43+
'Some file IDs do not belong to the project. Please provide only files from the same project.')
44+
45+
def get_image_data(self, diffgram_file):
46+
if hasattr(diffgram_file, 'image'):
47+
image = imread(diffgram_file.image.get('url_signed'))
48+
return image
49+
else:
50+
raise Exception('Pytorch datasets only support images. Please provide only file_ids from images')
51+
52+
def get_file_instances(self, diffgram_file):
53+
if diffgram_file.type not in ['image', 'frame']:
54+
raise NotImplementedError('File type "{}" is not supported yet'.format(diffgram_file['type']))
55+
56+
image = self.get_image_data(diffgram_file)
57+
instance_list = diffgram_file.instance_list
58+
instance_types_in_file = set([x['type'] for x in instance_list])
59+
# Process the instances of each file
60+
sample = {'image': image, 'diffgram_file': diffgram_file}
61+
has_boxes = False
62+
has_poly = False
63+
if 'box' in instance_types_in_file:
64+
has_boxes = True
65+
x_min_list, x_max_list, y_min_list, y_max_list = self.extract_bbox_values(instance_list, diffgram_file)
66+
sample['x_min_list'] = x_min_list
67+
sample['x_max_list'] = x_max_list
68+
sample['y_min_list'] = y_min_list
69+
sample['y_max_list'] = y_max_list
70+
71+
if 'polygon' in instance_types_in_file:
72+
has_poly = True
73+
mask_list = self.extract_masks_from_polygon(instance_list, diffgram_file)
74+
sample['polygon_mask_list'] = mask_list
75+
76+
if len(instance_types_in_file) > 2 and has_boxes and has_boxes:
77+
raise NotImplementedError(
78+
'SDK only supports boxes and polygon types currently. If you want a new instance type to be supported please contact us!'
79+
)
80+
81+
label_id_list, label_name_list = self.extract_labels(instance_list)
82+
sample['label_id_list'] = label_id_list
83+
sample['label_name_list'] = label_name_list
84+
85+
return sample
86+
87+
def extract_masks_from_polygon(self, instance_list, diffgram_file, empty_value = 0):
88+
nx, ny = diffgram_file.image['width'], diffgram_file.image['height']
89+
mask_list = []
90+
for instance in instance_list:
91+
if instance['type'] != 'polygon':
92+
continue
93+
poly = [(p['x'], p['y']) for p in instance['points']]
94+
95+
img = Image.new(mode = 'L', size = (nx, ny), color = 0) # mode L = 8-bit pixels, black and white
96+
draw = ImageDraw.Draw(img)
97+
draw.polygon(poly, outline = 1, fill = 1)
98+
mask = np.array(img).astype('float32')
99+
# mask[np.where(mask == 0)] = empty_value
100+
mask_list.append(mask)
101+
return mask_list
102+
103+
def extract_labels(self, instance_list, allowed_instance_types = None):
104+
label_file_id_list = []
105+
label_names_list = []
106+
107+
for inst in instance_list:
108+
if allowed_instance_types and inst['type'] in allowed_instance_types:
109+
continue
110+
111+
label_file_id_list.append(inst['label_file']['id'])
112+
label_names_list.append(inst['label_file']['label']['name'])
113+
114+
return label_file_id_list, label_names_list
115+
116+
def extract_bbox_values(self, instance_list, diffgram_file):
117+
"""
118+
Creates a pytorch tensor based on the instance type.
119+
For now we are assuming shapes here, but we can extend it
120+
to accept custom shapes specified by the user.
121+
:param instance:
122+
:return:
123+
"""
124+
x_min_list = []
125+
x_max_list = []
126+
y_min_list = []
127+
y_max_list = []
128+
129+
for inst in instance_list:
130+
if inst['type'] != 'box':
131+
continue
132+
x_min_list.append(inst['x_min'] / diffgram_file.image['width'])
133+
x_max_list.append(inst['x_max'] / diffgram_file.image['width'])
134+
y_min_list.append(inst['y_min'] / diffgram_file.image['width'])
135+
y_max_list.append(inst['y_max'] / diffgram_file.image['width'])
136+
137+
return x_min_list, x_max_list, y_min_list, y_max_list

sdk/diffgram/core/directory.py

Lines changed: 94 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
from diffgram.file.file import File
22
from ..regular.regular import refresh_from_dict
33
import logging
4+
from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset
5+
from diffgram.tensorflow_diffgram.diffgram_tensorflow_dataset import DiffgramTensorflowDataset
6+
from diffgram.core.diffgram_dataset_iterator import DiffgramDatasetIterator
47

58

69
def get_directory_list(self):
@@ -71,14 +74,80 @@ def set_directory_by_name(self, name):
7174
str(names_attempted))
7275

7376

74-
class Directory():
77+
class Directory(DiffgramDatasetIterator):
7578

76-
def __init__(self,
77-
client):
79+
def __init__(self, client, file_id_list_sliced = None):
7880

7981
self.client = client
8082
self.id = None
83+
self.file_list_metadata = {}
8184

85+
if file_id_list_sliced is None:
86+
self.file_id_list = self.all_file_ids()
87+
else:
88+
self.file_id_list = file_id_list_sliced
89+
super(Directory, self).__init__(self.client, self.file_id_list)
90+
91+
def all_files(self):
92+
"""
93+
Get all the files of the directoy.
94+
Warning! This can be an expensive function and take a long time.
95+
:return:
96+
"""
97+
page_num = 1
98+
result = []
99+
while page_num is not None:
100+
diffgram_files = self.list_files(limit = 1000, page_num = page_num, file_view_mode = 'base')
101+
page_num = self.file_list_metadata['next_page']
102+
result = result + diffgram_files
103+
return result
104+
105+
def all_file_ids(self):
106+
page_num = 1
107+
result = []
108+
while page_num is not None:
109+
diffgram_ids = self.list_files(limit = 1000, page_num = page_num, file_view_mode = 'ids_only')
110+
page_num = self.file_list_metadata['next_page']
111+
result = result + diffgram_ids
112+
return result
113+
114+
def slice(self, query):
115+
from diffgram.core.sliced_directory import SlicedDirectory
116+
# Get the first page to validate syntax.
117+
self.list_files(
118+
limit = 25,
119+
page_num = 1,
120+
file_view_mode = 'ids_only',
121+
query = query,
122+
)
123+
sliced_dataset = SlicedDirectory(
124+
client = self.client,
125+
query = query,
126+
original_directory = self
127+
)
128+
return sliced_dataset
129+
130+
def to_pytorch(self, transform = None):
131+
"""
132+
Transforms the file list inside the dataset into a pytorch dataset.
133+
:return:
134+
"""
135+
file_id_list = self.all_file_ids()
136+
pytorch_dataset = DiffgramPytorchDataset(
137+
project = self.client,
138+
diffgram_file_id_list = file_id_list,
139+
transform = transform
140+
141+
)
142+
return pytorch_dataset
143+
144+
def to_tensorflow(self):
145+
file_id_list = self.all_file_ids()
146+
diffgram_tensorflow_dataset = DiffgramTensorflowDataset(
147+
project = self.client,
148+
diffgram_file_id_list = file_id_list
149+
)
150+
return diffgram_tensorflow_dataset
82151

83152
def new(self, name: str):
84153
"""
@@ -131,9 +200,12 @@ def new(self, name: str):
131200

132201

133202
def list_files(
134-
self,
135-
limit=None,
136-
search_term: str =None):
203+
self,
204+
page_num=1,
205+
limit=100,
206+
search_term: str =None,
207+
file_view_mode: str = 'annotation',
208+
query: str = None):
137209
"""
138210
Get a list of files in directory (from Diffgram service).
139211
@@ -158,7 +230,6 @@ def list_files(
158230
else:
159231
logging.info("Using Default Dataset ID " + str(self.client.directory_id))
160232
directory_id = self.client.directory_id
161-
#print("directory_id", directory_id)
162233

163234
metadata = {'metadata' :
164235
{
@@ -167,10 +238,10 @@ def list_files(
167238
'annotation_status': "All",
168239
'limit': limit,
169240
'media_type': "All",
170-
'request_next_page': False,
171-
'request_previous_page': False,
172-
'file_view_mode': "annotation",
173-
'search_term': search_term
241+
'page': page_num,
242+
'file_view_mode': file_view_mode,
243+
'search_term': search_term,
244+
'query': query
174245
}
175246
}
176247

@@ -190,17 +261,20 @@ def list_files(
190261
# Success
191262
data = response.json()
192263
file_list_json = data.get('file_list')
193-
264+
self.file_list_metadata = data.get('metadata')
194265
# TODO would like this to perhaps be a seperate function
195266
# ie part of File_Constructor perhaps
196-
file_list = []
197-
for file_json in file_list_json:
198-
file = File.new(
199-
client = self.client,
200-
file_json = file_json)
201-
file_list.append(file)
202-
203-
return file_list
267+
if file_view_mode == 'ids_only':
268+
return file_list_json
269+
else:
270+
file_list = []
271+
for file_json in file_list_json:
272+
file = File.new(
273+
client = self.client,
274+
file_json = file_json)
275+
file_list.append(file)
276+
277+
return file_list
204278

205279

206280
def get(self,
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
from diffgram.core.directory import Directory
2+
from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset
3+
from diffgram.tensorflow_diffgram.diffgram_tensorflow_dataset import DiffgramTensorflowDataset
4+
5+
6+
class SlicedDirectory(Directory):
7+
8+
def __init__(self, client, original_directory: Directory, query: str):
9+
self.original_directory = original_directory
10+
self.query = query
11+
self.client = client
12+
# Share the same ID from the original directory as this is just an in-memory construct for better semantics.
13+
self.id = original_directory.id
14+
self.file_id_list = self.all_file_ids()
15+
super(Directory, self).__init__(self.client, self.file_id_list)
16+
17+
def all_file_ids(self):
18+
page_num = 1
19+
result = []
20+
while page_num is not None:
21+
diffgram_files = self.list_files(limit = 1000,
22+
page_num = page_num,
23+
file_view_mode = 'ids_only',
24+
query = self.query)
25+
page_num = self.file_list_metadata['next_page']
26+
result = result + diffgram_files
27+
return result
28+
29+
def to_pytorch(self, transform = None):
30+
"""
31+
Transforms the file list inside the dataset into a pytorch dataset.
32+
:return:
33+
"""
34+
35+
pytorch_dataset = DiffgramPytorchDataset(
36+
project = self.client,
37+
diffgram_file_id_list = self.file_id_list,
38+
transform = transform
39+
40+
)
41+
return pytorch_dataset
42+
43+
def to_tensorflow(self):
44+
file_id_list = self.all_file_ids()
45+
diffgram_tensorflow_dataset = DiffgramTensorflowDataset(
46+
project = self.client,
47+
diffgram_file_id_list = file_id_list
48+
)
49+
return diffgram_tensorflow_dataset

sdk/diffgram/file/file.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from ..regular.regular import refresh_from_dict
22

3-
43
class File():
54
"""
65
file literal object
@@ -11,11 +10,12 @@ class File():
1110

1211
def __init__(
1312
self,
14-
id=None,
15-
client=None):
13+
id = None,
14+
client = None):
1615
self.id = id
1716
self.client = client
1817

18+
@staticmethod
1919
def new(
2020
client,
2121
file_json):
@@ -62,7 +62,8 @@ def update(
6262
packet['instance_list'] = instance_list
6363

6464
# Current default server side is to not overwrite
65-
# packet['overwrite'] = overwrite
65+
if overwrite:
66+
packet['mode'] = "update_with_existing"
6667

6768
self.client.file.from_packet(packet=packet)
6869

0 commit comments

Comments
 (0)