22import logging
33import time
44from pathlib import Path
5+ from typing import Any
56from typing import BinaryIO
7+ from typing import Dict
68from typing import Iterable
9+ from typing import Set
710from typing import Tuple
811from typing import Union
912
@@ -217,6 +220,7 @@ def create_from_objects(cls, client, project_id: str, name: str,
217220 Returns:
218221 BulkImportRequest object
219222 """
223+ _validate_ndjson (predictions )
220224 data_str = ndjson .dumps (predictions )
221225 data = data_str .encode ('utf-8' )
222226 file_name = _make_file_name (project_id , name )
@@ -264,8 +268,7 @@ def create_from_local_file(cls,
264268 # by iterating through the file so we only store
265269 # each line in memory rather than the entire file
266270 try :
267- for line in reader :
268- pass
271+ _validate_ndjson (reader )
269272 except ValueError :
270273 raise ValueError (f"{ file } is not a valid ndjson file" )
271274 else :
@@ -274,3 +277,21 @@ def create_from_local_file(cls,
274277 response_data = _send_create_file_command (client , request_data ,
275278 file_name , file_data )
276279 return cls (client , response_data ["createBulkImportRequest" ])
280+
281+
282+ class NdjsonError (Exception ):
283+ pass
284+
285+
286+ class UuidError (NdjsonError ):
287+ pass
288+
289+
290+ def _validate_ndjson (lines : Iterable [Dict [str , Any ]]) -> None :
291+ uuids : Set [str ] = set ()
292+ for line in lines :
293+ uuid = line ['uuid' ]
294+ if uuid in uuids :
295+ raise UuidError (f'{ uuid } already used in this import job, '
296+ 'must be unique for the project.' )
297+ uuids .add (line ['uuid' ])
0 commit comments