@@ -614,56 +614,57 @@ def _get_hyde_response(query, prompt, llm_text):
614614 # ParentDocument based
615615 def get_parent_document_similar_docs (cls , ** kwargs ):
616616
617- child_search_results = kwargs ["similar_docs" ]
618-
619- parent_info , similar_docs = {}, []
620- for rank , (doc , score ) in enumerate (child_search_results ):
621- parent_id = doc .metadata ["parent_id" ]
622- if parent_id != "NA" : ## For Tables and Images
623- if parent_id not in parent_info :
624- parent_info [parent_id ] = (rank + 1 , score )
625- else :
626- if kwargs ["hybrid" ]:
627- similar_docs .append ((doc , score ))
617+ child_search_results = kwargs ["similar_docs" ]
618+
619+ parent_info , similar_docs = {}, []
620+ for rank , (doc , score ) in enumerate (child_search_results ):
621+ parent_id = doc .metadata ["parent_id" ]
622+ if parent_id != "NA" : ## For Tables and Images
623+ if parent_id not in parent_info :
624+ parent_info [parent_id ] = (rank + 1 , score )
628625 else :
629- similar_docs .append ((doc ))
630-
631- parent_ids = sorted (parent_info .items (), key = lambda x : x [1 ], reverse = False )
632- parent_ids = list (map (lambda x :x [0 ], parent_ids ))
633-
634- parent_docs = opensearch_utils .get_documents_by_ids (
635- os_client = kwargs ["os_client" ],
636- ids = parent_ids ,
637- index_name = kwargs ["index_name" ],
638- )
639-
640- if parent_docs ["docs" ]:
641- for res in parent_docs ["docs" ]:
642- doc_id = res ["_id" ]
643- doc = Document (
644- page_content = res ["_source" ]["text" ],
645- metadata = res ["_source" ]["metadata" ]
626+ if kwargs ["hybrid" ]:
627+ similar_docs .append ((doc , score ))
628+ else :
629+ similar_docs .append ((doc ))
630+
631+ parent_ids = sorted (parent_info .items (), key = lambda x : x [1 ], reverse = False )
632+ parent_ids = list (map (lambda x :x [0 ], parent_ids ))
633+
634+ if parent_ids :
635+ parent_docs = opensearch_utils .get_documents_by_ids (
636+ os_client = kwargs ["os_client" ],
637+ ids = parent_ids ,
638+ index_name = kwargs ["index_name" ],
646639 )
647- if kwargs ["hybrid" ]:
648- similar_docs .append ((doc , parent_info [doc_id ][1 ]))
649- else :
650- similar_docs .append ((doc ))
651-
652- if kwargs ["hybrid" ]:
653- similar_docs = sorted (
654- similar_docs ,
655- key = lambda x : x [1 ],
656- reverse = True
657- )
658-
659- if kwargs ["verbose" ]:
660- print ("===== ParentDocument =====" )
661- print (f'filter: { kwargs ["boolean_filter" ]} ' )
662- print (f'# child_docs: { len (child_search_results )} ' )
663- print (f'# parent docs: { len (similar_docs )} ' )
664- print (f'# duplicates: { len (child_search_results )- len (similar_docs )} ' )
665640
666- return similar_docs
641+ if parent_docs ["docs" ]:
642+ for res in parent_docs ["docs" ]:
643+ doc_id = res ["_id" ]
644+ doc = Document (
645+ page_content = res ["_source" ]["text" ],
646+ metadata = res ["_source" ]["metadata" ]
647+ )
648+ if kwargs ["hybrid" ]:
649+ similar_docs .append ((doc , parent_info [doc_id ][1 ]))
650+ else :
651+ similar_docs .append ((doc ))
652+
653+ if kwargs ["hybrid" ]:
654+ similar_docs = sorted (
655+ similar_docs ,
656+ key = lambda x : x [1 ],
657+ reverse = True
658+ )
659+
660+ if kwargs ["verbose" ]:
661+ print ("===== ParentDocument =====" )
662+ print (f'filter: { kwargs ["boolean_filter" ]} ' )
663+ print (f'# child_docs: { len (child_search_results )} ' )
664+ print (f'# parent docs: { len (similar_docs )} ' )
665+ print (f'# duplicates: { len (child_search_results )- len (similar_docs )} ' )
666+
667+ return similar_docs
667668
668669 @classmethod
669670 def get_rerank_docs (cls , ** kwargs ):
0 commit comments