|
| 1 | +from labelbox import utils |
1 | 2 | import os |
2 | 3 | import json |
3 | 4 | import logging |
@@ -81,13 +82,17 @@ def create_data_rows(self, items): |
81 | 82 | is uploaded to Labelbox and a DataRow referencing it is created. |
82 | 83 |
|
83 | 84 | If an item is a `dict`, then it could support one of the two following structures |
84 | | - 1. For static imagery, video, and text it should map `DataRow` fields (or their names) to values. |
85 | | - At the minimum an `item` passed as a `dict` must contain a `DataRow.row_data` key and value. |
| 85 | + 1. For static imagery, video, and text it should map `DataRow` field names to values. |
| 86 | + At the minimum an `item` passed as a `dict` must contain a `row_data` key and value. |
| 87 | + If the value for row_data is a local file path and the path exists, |
| 88 | + then the local file will be uploaded to labelbox. |
| 89 | +
|
86 | 90 | 2. For tiled imagery the dict must match the import structure specified in the link below |
87 | 91 | https://docs.labelbox.com/data-model/en/index-en#tiled-imagery-import |
88 | 92 |
|
89 | 93 | >>> dataset.create_data_rows([ |
90 | 94 | >>> {DataRow.row_data:"http://my_site.com/photos/img_01.jpg"}, |
| 95 | + >>> {DataRow.row_data:"/path/to/file1.jpg"}, |
91 | 96 | >>> "path/to/file2.jpg", |
92 | 97 | >>> {"tileLayerUrl" : "http://", ...} |
93 | 98 | >>> ]) |
@@ -115,64 +120,105 @@ def create_data_rows(self, items): |
115 | 120 | DataRow = Entity.DataRow |
116 | 121 |
|
117 | 122 | def upload_if_necessary(item): |
118 | | - if isinstance(item, str): |
119 | | - item_url = self.client.upload_file(item) |
120 | | - # Convert item from str into a dict so it gets processed |
121 | | - # like all other dicts. |
122 | | - item = {DataRow.row_data: item_url, DataRow.external_id: item} |
| 123 | + row_data = item['row_data'] |
| 124 | + if os.path.exists(row_data): |
| 125 | + item_url = self.client.upload_file(item['row_data']) |
| 126 | + item = { |
| 127 | + "row_data": item_url, |
| 128 | + "external_id": item.get('external_id', item['row_data']), |
| 129 | + "attachments": item.get('attachments', []) |
| 130 | + } |
123 | 131 | return item |
124 | 132 |
|
125 | | - with ThreadPoolExecutor(file_upload_thread_count) as executor: |
126 | | - futures = [ |
127 | | - executor.submit(upload_if_necessary, item) for item in items |
128 | | - ] |
129 | | - items = [future.result() for future in as_completed(futures)] |
130 | | - |
131 | | - def convert_item(item): |
132 | | - # Don't make any changes to tms data |
133 | | - if "tileLayerUrl" in item: |
134 | | - return item |
135 | | - # Convert string names to fields. |
136 | | - item = { |
137 | | - key if isinstance(key, Field) else DataRow.field(key): value |
138 | | - for key, value in item.items() |
139 | | - } |
| 133 | + def validate_attachments(item): |
| 134 | + attachments = item.get('attachments') |
| 135 | + if attachments: |
| 136 | + if isinstance(attachments, list): |
| 137 | + for attachment in attachments: |
| 138 | + Entity.AssetAttachment.validate_attachment_json( |
| 139 | + attachment) |
| 140 | + else: |
| 141 | + raise ValueError( |
| 142 | + f"Attachments must be a list. Found {type(attachments)}" |
| 143 | + ) |
| 144 | + return attachments |
| 145 | + |
| 146 | + def format_row(item): |
| 147 | + # Formats user input into a consistent dict structure |
| 148 | + if isinstance(item, dict): |
| 149 | + # Convert fields to strings |
| 150 | + item = { |
| 151 | + key.name if isinstance(key, Field) else key: value |
| 152 | + for key, value in item.items() |
| 153 | + } |
| 154 | + elif isinstance(item, str): |
| 155 | + # The main advantage of using a string over a dict is that the user is specifying |
| 156 | + # that the file should exist locally. |
| 157 | + # That info is lost after this section so we should check for it here. |
| 158 | + if not os.path.exists(item): |
| 159 | + raise ValueError(f"Filepath {item} does not exist.") |
| 160 | + item = {"row_data": item, "external_id": item} |
| 161 | + return item |
140 | 162 |
|
141 | | - if DataRow.row_data not in item: |
| 163 | + def validate_keys(item): |
| 164 | + if 'row_data' not in item: |
142 | 165 | raise InvalidQueryError( |
143 | | - "DataRow.row_data missing when creating DataRow.") |
| 166 | + "`row_data` missing when creating DataRow.") |
144 | 167 |
|
145 | | - invalid_keys = set(item) - set(DataRow.fields()) |
| 168 | + invalid_keys = set(item) - { |
| 169 | + *{f.name for f in DataRow.fields()}, 'attachments' |
| 170 | + } |
146 | 171 | if invalid_keys: |
147 | 172 | raise InvalidAttributeError(DataRow, invalid_keys) |
| 173 | + return item |
| 174 | + |
| 175 | + def convert_item(item): |
| 176 | + # Don't make any changes to tms data |
| 177 | + if "tileLayerUrl" in item: |
| 178 | + validate_attachments(item) |
| 179 | + return item |
| 180 | + # Convert all payload variations into the same dict format |
| 181 | + item = format_row(item) |
| 182 | + # Make sure required keys exist (and there are no extra keys) |
| 183 | + validate_keys(item) |
| 184 | + # Make sure attachments are valid |
| 185 | + validate_attachments(item) |
| 186 | + # Upload any local file paths |
| 187 | + item = upload_if_necessary(item) |
148 | 188 |
|
149 | | - # Item is valid, convert it to a dict {graphql_field_name: value} |
150 | | - # Need to change the name of DataRow.row_data to "data" |
151 | 189 | return { |
152 | | - "data" if key == DataRow.row_data else key.graphql_name: value |
| 190 | + "data" if key == "row_data" else utils.camel_case(key): value |
153 | 191 | for key, value in item.items() |
154 | 192 | } |
155 | 193 |
|
| 194 | + if not isinstance(items, list): |
| 195 | + raise ValueError( |
| 196 | + f"Must pass a list to create_data_rows. Found {type(items)}") |
| 197 | + |
| 198 | + with ThreadPoolExecutor(file_upload_thread_count) as executor: |
| 199 | + futures = [executor.submit(convert_item, item) for item in items] |
| 200 | + items = [future.result() for future in as_completed(futures)] |
| 201 | + |
156 | 202 | # Prepare and upload the desciptor file |
157 | | - items = [convert_item(item) for item in items] |
158 | 203 | data = json.dumps(items) |
159 | 204 | descriptor_url = self.client.upload_data(data) |
160 | | - |
161 | 205 | # Create data source |
162 | 206 | dataset_param = "datasetId" |
163 | 207 | url_param = "jsonUrl" |
164 | 208 | query_str = """mutation AppendRowsToDatasetPyApi($%s: ID!, $%s: String!){ |
165 | 209 | appendRowsToDataset(data:{datasetId: $%s, jsonFileUrl: $%s} |
166 | | - ){ taskId accepted } } """ % (dataset_param, url_param, |
167 | | - dataset_param, url_param) |
| 210 | + ){ taskId accepted errorMessage } } """ % (dataset_param, url_param, |
| 211 | + dataset_param, url_param) |
| 212 | + |
168 | 213 | res = self.client.execute(query_str, { |
169 | 214 | dataset_param: self.uid, |
170 | 215 | url_param: descriptor_url |
171 | 216 | }) |
172 | 217 | res = res["appendRowsToDataset"] |
173 | 218 | if not res["accepted"]: |
| 219 | + msg = res['errorMessage'] |
174 | 220 | raise InvalidQueryError( |
175 | | - "Server did not accept DataRow creation request") |
| 221 | + f"Server did not accept DataRow creation request. {msg}") |
176 | 222 |
|
177 | 223 | # Fetch and return the task. |
178 | 224 | task_id = res["taskId"] |
|
0 commit comments