Skip to content

Commit 73ff7e4

Browse files
authored
Merge pull request #90 from seungwon2/master
[Fix] : fix no parent doc error
2 parents 4165bda + 41c90b0 commit 73ff7e4

File tree

1 file changed

+48
-47
lines changed

1 file changed

+48
-47
lines changed

genai/aws-gen-ai-kr/utils/rag_summit.py

Lines changed: 48 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -614,56 +614,57 @@ def _get_hyde_response(query, prompt, llm_text):
614614
# ParentDocument based
615615
def get_parent_document_similar_docs(cls, **kwargs):
616616

617-
child_search_results = kwargs["similar_docs"]
618-
619-
parent_info, similar_docs = {}, []
620-
for rank, (doc, score) in enumerate(child_search_results):
621-
parent_id = doc.metadata["parent_id"]
622-
if parent_id != "NA": ## For Tables and Images
623-
if parent_id not in parent_info:
624-
parent_info[parent_id] = (rank+1, score)
625-
else:
626-
if kwargs["hybrid"]:
627-
similar_docs.append((doc, score))
617+
child_search_results = kwargs["similar_docs"]
618+
619+
parent_info, similar_docs = {}, []
620+
for rank, (doc, score) in enumerate(child_search_results):
621+
parent_id = doc.metadata["parent_id"]
622+
if parent_id != "NA": ## For Tables and Images
623+
if parent_id not in parent_info:
624+
parent_info[parent_id] = (rank+1, score)
628625
else:
629-
similar_docs.append((doc))
630-
631-
parent_ids = sorted(parent_info.items(), key=lambda x: x[1], reverse=False)
632-
parent_ids = list(map(lambda x:x[0], parent_ids))
633-
634-
parent_docs = opensearch_utils.get_documents_by_ids(
635-
os_client=kwargs["os_client"],
636-
ids=parent_ids,
637-
index_name=kwargs["index_name"],
638-
)
639-
640-
if parent_docs["docs"]:
641-
for res in parent_docs["docs"]:
642-
doc_id = res["_id"]
643-
doc = Document(
644-
page_content=res["_source"]["text"],
645-
metadata=res["_source"]["metadata"]
626+
if kwargs["hybrid"]:
627+
similar_docs.append((doc, score))
628+
else:
629+
similar_docs.append((doc))
630+
631+
parent_ids = sorted(parent_info.items(), key=lambda x: x[1], reverse=False)
632+
parent_ids = list(map(lambda x:x[0], parent_ids))
633+
634+
if parent_ids:
635+
parent_docs = opensearch_utils.get_documents_by_ids(
636+
os_client=kwargs["os_client"],
637+
ids=parent_ids,
638+
index_name=kwargs["index_name"],
646639
)
647-
if kwargs["hybrid"]:
648-
similar_docs.append((doc, parent_info[doc_id][1]))
649-
else:
650-
similar_docs.append((doc))
651-
652-
if kwargs["hybrid"]:
653-
similar_docs = sorted(
654-
similar_docs,
655-
key=lambda x: x[1],
656-
reverse=True
657-
)
658-
659-
if kwargs["verbose"]:
660-
print("===== ParentDocument =====")
661-
print (f'filter: {kwargs["boolean_filter"]}')
662-
print (f'# child_docs: {len(child_search_results)}')
663-
print (f'# parent docs: {len(similar_docs)}')
664-
print (f'# duplicates: {len(child_search_results)-len(similar_docs)}')
665640

666-
return similar_docs
641+
if parent_docs["docs"]:
642+
for res in parent_docs["docs"]:
643+
doc_id = res["_id"]
644+
doc = Document(
645+
page_content=res["_source"]["text"],
646+
metadata=res["_source"]["metadata"]
647+
)
648+
if kwargs["hybrid"]:
649+
similar_docs.append((doc, parent_info[doc_id][1]))
650+
else:
651+
similar_docs.append((doc))
652+
653+
if kwargs["hybrid"]:
654+
similar_docs = sorted(
655+
similar_docs,
656+
key=lambda x: x[1],
657+
reverse=True
658+
)
659+
660+
if kwargs["verbose"]:
661+
print("===== ParentDocument =====")
662+
print (f'filter: {kwargs["boolean_filter"]}')
663+
print (f'# child_docs: {len(child_search_results)}')
664+
print (f'# parent docs: {len(similar_docs)}')
665+
print (f'# duplicates: {len(child_search_results)-len(similar_docs)}')
666+
667+
return similar_docs
667668

668669
@classmethod
669670
def get_rerank_docs(cls, **kwargs):

0 commit comments

Comments
 (0)