@@ -359,22 +359,26 @@ def bulk_delete(
359359 return result
360360
361361
362- def bulk_index (client : OpenSearch , index : str , records : Iterator [dict ]) -> dict [str , int ]:
362+ def bulk_index (
363+ client : OpenSearch , index : str , records : Iterator [dict ], action : str
364+ ) -> dict [str , int ]:
363365 """Indexes records into an existing index using the streaming bulk helper.
364366
365- This action function uses the OpenSearch "index" action, which is a
366- combination of create and update: if a record with the same _id exists in the
367- index, it will be updated. If it does not exist, the record will be indexed as a
368- new document.
367+ This method uses the OpenSearch "index" and "update" operations.
368+ - Setting `action` to "index" will either create or update a record.
369+ If a record with the same _id exists in the index, it will be updated;
370+ if it does not exist, the record will be added as a new document.
371+ - Setting `action` to "update" will update a document only if it exists
372+ in the index. Otherwise, an error is raised.
369373
370- If an error occurs during record indexing , it will be logged and bulk indexing will
371- continue until all records have been processed.
374+ If an error occurs during the operation , it will be logged, and the bulk
375+ operation will continue until all records have been processed.
372376
373377 Returns total sums of: records created, records updated, errors, and total records
374378 processed.
375379 """
376380 result = {"created" : 0 , "updated" : 0 , "errors" : 0 , "total" : 0 }
377- actions = helpers .generate_bulk_actions (index , records , "index" )
381+ actions = helpers .generate_bulk_actions (index , records , action )
378382 responses = streaming_bulk (
379383 client ,
380384 actions ,
@@ -400,34 +404,17 @@ def bulk_index(client: OpenSearch, index: str, records: Iterator[dict]) -> dict[
400404 result ["updated" ] += 1
401405 else :
402406 logger .error (
403- "Something unexpected happened during ingest. Bulk index response: %s" ,
407+ "Something unexpected happened during ingest. "
408+ f"Bulk { action } response: %s" ,
404409 json .dumps (response ),
405410 )
406411 result ["errors" ] += 1
407412 result ["total" ] += 1
408413 if result ["total" ] % int (os .getenv ("STATUS_UPDATE_INTERVAL" , "1000" )) == 0 :
409- logger .info ("Status update: %s records indexed so far!" , result ["total" ])
414+ logger .info ("Status update: %s records processed so far!" , result ["total" ])
410415 logger .info ("All records ingested, refreshing index." )
411416 response = client .indices .refresh (
412417 index = index ,
413418 )
414419 logger .debug (response )
415420 return result
416-
417-
418- def bulk_update (
419- client : OpenSearch , index : str , records : Iterator [dict ]
420- ) -> dict [str , int ]:
421- result = {"updated" : 0 , "errors" : 0 , "total" : 0 }
422- actions = helpers .generate_bulk_actions (index , records , "update" )
423- responses = streaming_bulk (
424- client ,
425- actions ,
426- max_chunk_bytes = REQUEST_CONFIG ["OPENSEARCH_BULK_MAX_CHUNK_BYTES" ],
427- raise_on_error = False ,
428- )
429- for response in responses :
430- # TODO: Parse different responses from bulk operations
431- pass
432- logger .debug (response )
433- return result
0 commit comments