aws-samples
diff --git a/‎samples/rfp-answer-generation/README.md‎
Lines changed: 2 additions & 0 deletions b/‎samples/rfp-answer-generation/README.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎samples/rfp-answer-generation/backend/README.md‎
Lines changed: 16 additions & 1 deletion b/‎samples/rfp-answer-generation/backend/README.md‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎samples/rfp-answer-generation/backend/stack/lambdas/shared/processor.py‎
Lines changed: 24 additions & 11 deletions b/‎samples/rfp-answer-generation/backend/stack/lambdas/shared/processor.py‎
Lines changed: 24 additions & 11 deletions
diff --git a/‎samples/rfp-answer-generation/backend/stack/stack_constructs/sfn_step_answer_question.py‎
Lines changed: 1 addition & 0 deletions b/‎samples/rfp-answer-generation/backend/stack/stack_constructs/sfn_step_answer_question.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎samples/rfp-answer-generation/docs/images/data-flow.png‎
112 KB b/‎samples/rfp-answer-generation/docs/images/data-flow.png‎
112 KB
diff --git a/‎samples/rfp-answer-generation/docs/images/usage-1.png‎
319 KB b/‎samples/rfp-answer-generation/docs/images/usage-1.png‎
319 KB
diff --git a/‎samples/rfp-answer-generation/docs/images/usage-2.png‎
406 KB b/‎samples/rfp-answer-generation/docs/images/usage-2.png‎
406 KB
diff --git a/‎samples/rfp-answer-generation/docs/images/usage-3.png‎
520 KB b/‎samples/rfp-answer-generation/docs/images/usage-3.png‎
520 KB
diff --git a/‎samples/rfp-answer-generation/docs/images/usage-4.png‎
510 KB b/‎samples/rfp-answer-generation/docs/images/usage-4.png‎
510 KB
diff --git a/‎samples/rfp-answer-generation/docs/images/usage-5.png‎
443 KB b/‎samples/rfp-answer-generation/docs/images/usage-5.png‎
443 KB
@@ -14,6 +14,8 @@ The project is powered by Large Language Models (LLM) available on Amazon Bedroc
 
 The solution is divided into 2 stacks: Ingestion and Inference. Additionally, this project provides you with a sample front-end application that allows you to upload and process new RFPs, and interact with the answers provided by the LLM: you can edit, approve and download the answers as a new XLSX file.
 
+You can also inspect the solution [data flow](docs/images/data-flow.png).
+
 ### Ingestion stack
 
 The ingestion stack uses the [Generative AI CDK Constructs](https://github.com/awslabs/generative-ai-cdk-constructs) Knowledge Bases and OpenSearch Serverless L2 constructs to deploy a fully-managed data ingestion pipeline powered by Anthropic Claude LLMs.
 
@@ -96,4 +96,19 @@ To deploy the `Inference` stack directly into your AWS account:
    ```shell
    $ cdk deploy RFPAnswers-InferenceStack --require-approval=never --verbose
    ```
-   
+
+## Clean up
+
+Do not forget to delete the stack to avoid unexpected charges.
+
+First make sure to remove all data from the Amazon Simple Storage Service (Amazon S3) Buckets.
+
+```shell
+    $ cdk destroy RFPAnswers-IngestionStack
+```
+
+```shell
+    $ cdk destroy RFPAnswers-InferenceStack
+```
+
+Delete all the associated logs created by the different services in Amazon CloudWatch logs. 
@@ -11,7 +11,9 @@
 # and limitations under the License.
 #
 
+import logging
 import numpy as np
+import os
 import pandas as pd
 
 from botocore.config import Config
@@ -20,6 +22,9 @@
 
 from .utils import extract_items_from_tagged_list, get_bedrock_runtime
 
+logger = logging.getLogger()
+logger.setLevel(os.getenv("LOG_LEVEL", "INFO"))
+
 PROMPT_TEMPLATE = """
 You have received a previously answered Request For Proposal (RFP) document. Your task is to extract, ipsis literis, ALL of the questions and answers from every sheet in the document. Provide your answer enclosed by <rfp></rfp> tags.
 
@@ -35,13 +40,13 @@
 
 3. If the document contains instructions from the requester, do not extract them.
 
-4. Identify each group of questions in the RFP and the topic name for the group. The document format can very, and the topic can be specified in a column beside the question or in a single cell before a group of questions. Identify the topic name with <topic_name></topic_name> tags.
+4. Identify each group of questions in the RFP and the topic name for the group. The document format can very, and the topic can be specified in a column beside the question or in a single cell before a group of questions. If the topic name is specified in a column alongside the question and the answer, group questions with the same topics together. Identify the topic name with <topic_name></topic_name> tags.
 
 5. Identify each question and its answer.
 
 6. Under the <topic></topic> tags, enclose the question in <question></question> tags.
 
-7. Under the <topic></topic> tags, enclose the answer in <answer></answer> tags.
+7. Under the <topic></topic> tags, enclose the answer in <answer></answer> tags. If there is no answer to the question, add an empty <answer></answer> tag.
 
 Output Example:
 <rfp>
@@ -51,6 +56,15 @@
 <answer>Oktank LDTA</answer>
 </topic>
 </rfp>
+
+Output Example:
+<rfp>
+<topic>
+<topic_name>Company Information</topic_name>
+<question>Total number of employees</question>
+<answer></answer>
+</topic>
+</rfp>
 """
 
 
@@ -71,7 +85,7 @@ class BedrockKBProcessor:
     def __init__(
         self,
         model_id="anthropic.claude-3-5-sonnet-20240620-v1:0",
-        separator=",",
+        separator=";",
     ):
         self.model_id: str = model_id
         self.separator: str = separator
@@ -100,10 +114,9 @@ def process_file(self, filename: str, file_date: str) -> list[RFPChunk]:
                         "date": file_date,
                     }
                     for q, a in zip(questions, answers)
-                    if a
                 ]
 
-                print(f"Extracted: {len(extracted_qa)} QA pairs.")
+                logger.info(f"Extracted: {len(extracted_qa)} QA pairs.")
 
                 rfp_chunks += extracted_qa
 
@@ -160,13 +173,13 @@ def __extract_data_concurrent(self, filename):
             return [r.result() for r in as_completed(futures)]
 
     def __load_file_in_sections(self, filename: str) -> list[pd.DataFrame]:
-        print(filename)
+        logger.info(filename)
         extension: str = filename.split(".")[-1].upper()
-        print(f"File extension: {extension}")
+        logger.info(f"File extension: {extension}")
         sections: list[pd.DataFrame] = []
 
         if extension == "XLSX":
-            print("Loaded XLSX")
+            logger.info("Loaded XLSX")
             sheets_dict = pd.read_excel(
                 filename, sheet_name=None, header=None, engine="openpyxl"
             )
@@ -175,10 +188,10 @@ def __load_file_in_sections(self, filename: str) -> list[pd.DataFrame]:
                 sections += np.split(sheet, sheet[sheet.isnull().all(1)].index)
 
         else:
-            print("Loaded CSV")
-            sheet = pd.read_csv(filename, header=None, sep=self.separator)
+            logger.info("Loaded CSV")
+            sheet = pd.read_csv(filename, header=0, sep=self.separator)
             sections += np.split(sheet, sheet[sheet.isnull().all(1)].index)
 
-        print(f"Loaded {len(sections)} sheets")
+        logger.info(f"Loaded {len(sections)} sheets")
 
         return sections
@@ -67,6 +67,7 @@ def __init__(
                 ],
                 resources=[
                     f"arn:aws:bedrock:{Stack.of(self).region}::foundation-model/anthropic.claude-3-5-sonnet-20240620-v1:0",
+                    f"arn:aws:bedrock:{Stack.of(self).region}::foundation-model/anthropic.claude-3-haiku-20240307-v1:0",
                 ],
             )
         )
Original file line number	Diff line number	Diff line change
`@@ -67,6 +67,7 @@ def __init__(`
`67`	`67`	`],`
`68`	`68`	`resources=[`
`69`	`69`	`f"arn:aws:bedrock:{Stack.of(self).region}::foundation-model/anthropic.claude-3-5-sonnet-20240620-v1:0",`
	`70`	`+ f"arn:aws:bedrock:{Stack.of(self).region}::foundation-model/anthropic.claude-3-haiku-20240307-v1:0",`
`70`	`71`	`],`
`71`	`72`	`)`
`72`	`73`	`)`