|
31 | 31 | Inputs: |
32 | 32 | - requests_filepath : str |
33 | 33 | - path to the file containing the requests to be processed |
34 | | - - file should be a jsonl file, where each line is a json object with API parameters |
35 | | - - e.g., {"model": "text-embedding-ada-002", "input": "embed me"} |
| 34 | + - file should be a jsonl file, where each line is a json object with API parameters and an optional metadata field |
| 35 | + - e.g., {"model": "text-embedding-ada-002", "input": "embed me", "metadata": {"row_id": 1}} |
36 | 36 | - as with all jsonl files, take care that newlines in the content are properly escaped (json.dumps does this automatically) |
37 | 37 | - an example file is provided at examples/data/example_requests_to_parallel_process.jsonl |
38 | 38 | - the code to generate the example file is appended to the bottom of this script |
@@ -164,6 +164,7 @@ async def process_api_requests_from_file( |
164 | 164 | request_json=request_json, |
165 | 165 | token_consumption=num_tokens_consumed_from_request(request_json, api_endpoint, token_encoding_name), |
166 | 166 | attempts_left=max_attempts, |
| 167 | + metadata=request_json.pop("metadata", None) |
167 | 168 | ) |
168 | 169 | status_tracker.num_tasks_started += 1 |
169 | 170 | status_tracker.num_tasks_in_progress += 1 |
@@ -258,6 +259,7 @@ class APIRequest: |
258 | 259 | request_json: dict |
259 | 260 | token_consumption: int |
260 | 261 | attempts_left: int |
| 262 | + metadata: dict |
261 | 263 | result: list = field(default_factory=list) |
262 | 264 |
|
263 | 265 | async def call_api( |
@@ -298,11 +300,21 @@ async def call_api( |
298 | 300 | retry_queue.put_nowait(self) |
299 | 301 | else: |
300 | 302 | logging.error(f"Request {self.request_json} failed after all attempts. Saving errors: {self.result}") |
301 | | - append_to_jsonl([self.request_json, [str(e) for e in self.result]], save_filepath) |
| 303 | + data = ( |
| 304 | + [self.request_json, [str(e) for e in self.result], self.metadata] |
| 305 | + if self.metadata |
| 306 | + else [self.request_json, [str(e) for e in self.result]] |
| 307 | + ) |
| 308 | + append_to_jsonl(data, save_filepath) |
302 | 309 | status_tracker.num_tasks_in_progress -= 1 |
303 | 310 | status_tracker.num_tasks_failed += 1 |
304 | 311 | else: |
305 | | - append_to_jsonl([self.request_json, response], save_filepath) |
| 312 | + data = ( |
| 313 | + [self.request_json, response, self.metadata] |
| 314 | + if self.metadata |
| 315 | + else [self.request_json, response] |
| 316 | + ) |
| 317 | + append_to_jsonl(data, save_filepath) |
306 | 318 | status_tracker.num_tasks_in_progress -= 1 |
307 | 319 | status_tracker.num_tasks_succeeded += 1 |
308 | 320 | logging.debug(f"Request {self.task_id} saved to {save_filepath}") |
|
0 commit comments