@@ -28,26 +28,22 @@ class DescriptorFileCreator:
2828
2929 Args:
3030 client (Client): The client object
31- is_upsert (bool): Whether the upload is an upsert. This is a legacy parameter and should always be True because this class will only support upsert
3231 max_chunk_size_bytes (int): The maximum size of the file in bytes
33-
34- TODO: Remove is_upsert parameter
3532 """
3633
3734 def __init__ (self , client : "Client" ):
3835 self .client = client
36+ """"
37+ This method is used to convert a list to json and upload it in a file to gcs.
38+ It will create multiple files if the size of upload is greater than max_chunk_size_bytes in bytes,
39+ It uploads the files to gcs in parallel, and return a list of urls
3940
40- """"
41- This method is used to convert a list to json and upload it in a file to gcs.
42- It will create multiple files if the size of upload is greater than max_chunk_size_bytes in bytes,
43- It uploads the files to gcs in parallel, and return a list of urls
44-
45- Args:
46- items: The list to upload
47- is_upsert (bool): Whether the upload is an upsert
48- max_attachments_per_data_row (int): The maximum number of attachments per data row
49- max_chunk_size_bytes (int): The maximum size of the file in bytes
50- """
41+ Args:
42+ items: The list to upload
43+ is_upsert (bool): Whether the upload is an upsert
44+ max_attachments_per_data_row (int): The maximum number of attachments per data row
45+ max_chunk_size_bytes (int): The maximum size of the file in bytes
46+ """
5147
5248 def create (self ,
5349 items ,
@@ -56,7 +52,7 @@ def create(self,
5652 is_upsert = True # This class will only support upsert use cases
5753 items = self ._prepare_items_for_upload (items ,
5854 max_attachments_per_data_row ,
59- is_upsert )
55+ is_upsert = is_upsert )
6056 json_chunks = self ._chunk_down_by_bytes (items , max_chunk_size_bytes )
6157 with ThreadPoolExecutor (FILE_UPLOAD_THREAD_COUNT ) as executor :
6258 futures = [
@@ -66,14 +62,10 @@ def create(self,
6662 ]
6763 return [future .result () for future in as_completed (futures )]
6864
69- def create_one (self ,
70- items ,
71- max_attachments_per_data_row = None ,
72- is_upsert = False ) -> List [str ]:
65+ def create_one (self , items , max_attachments_per_data_row = None ) -> List [str ]:
7366 items = self ._prepare_items_for_upload (items ,
74- max_attachments_per_data_row ,
75- is_upsert )
76- # Prepare and upload the desciptor file
67+ max_attachments_per_data_row )
68+ # Prepare and upload the descriptor file
7769 data = json .dumps (items )
7870 return self .client .upload_data (data ,
7971 content_type = "application/json" ,
@@ -84,8 +76,7 @@ def _prepare_items_for_upload(self,
8476 max_attachments_per_data_row = None ,
8577 is_upsert = False ):
8678 """
87- This function is shared by `Dataset.create_data_rows`, `Dataset.create_data_rows_sync` and `Dataset.update_data_rows`.
88- It is used to prepare the input file. The user defined input is validated, processed, and json stringified.
79+ This function is used to prepare the input file. The user defined input is validated, processed, and json stringified.
8980 Finally the json data is uploaded to gcs and a uri is returned. This uri can be passed as a parameter to a mutation that uploads data rows
9081
9182 Each element in `items` can be either a `str` or a `dict`. If
@@ -109,9 +100,6 @@ def _prepare_items_for_upload(self,
109100 >>> {DataRow.row_data: {"type" : ..., 'version' : ..., 'messages' : [...]}}
110101 >>> ])
111102
112- For an example showing how to upload tiled data_rows see the following notebook:
113- https://github.com/Labelbox/labelbox-python/blob/ms/develop/model_assisted_labeling/tiled_imagery_mal.ipynb
114-
115103 Args:
116104 items (iterable of (dict or str)): See above for details.
117105 max_attachments_per_data_row (Optional[int]): Param used during attachment validation to determine
@@ -305,7 +293,7 @@ def _chunk_down_by_bytes(self, items: List[dict],
305293 max_chunk_size : int ) -> Generator [str , None , None ]:
306294 """
307295 Recursively chunks down a list of items into smaller lists until each list is less than or equal to max_chunk_size bytes
308- NOTE: of one data row is large than max_chunk_size, it will be returned as one chunk
296+ NOTE: if one data row is larger than max_chunk_size, it will be returned as one chunk
309297
310298 Returns:
311299 Generator[str, None, None]: A generator that yields a json string
0 commit comments