Skip to content

Commit 68d9bb6

Browse files
author
Luiza Hagemann
committed
fix: include additional usage documentation
1 parent e7b3f7e commit 68d9bb6

File tree

11 files changed

+97
-15
lines changed

11 files changed

+97
-15
lines changed

samples/rfp-answer-generation/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ The project is powered by Large Language Models (LLM) available on Amazon Bedroc
1414

1515
The solution is divided into 2 stacks: Ingestion and Inference. Additionally, this project provides you with a sample front-end application that allows you to upload and process new RFPs, and interact with the answers provided by the LLM: you can edit, approve and download the answers as a new XLSX file.
1616

17+
You can also inspect the solution [data flow](docs/images/data-flow.png).
18+
1719
### Ingestion stack
1820

1921
The ingestion stack uses the [Generative AI CDK Constructs](https://github.com/awslabs/generative-ai-cdk-constructs) Knowledge Bases and OpenSearch Serverless L2 constructs to deploy a fully-managed data ingestion pipeline powered by Anthropic Claude LLMs.

samples/rfp-answer-generation/backend/README.md

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,4 +96,19 @@ To deploy the `Inference` stack directly into your AWS account:
9696
```shell
9797
$ cdk deploy RFPAnswers-InferenceStack --require-approval=never --verbose
9898
```
99-
99+
100+
## Clean up
101+
102+
Do not forget to delete the stack to avoid unexpected charges.
103+
104+
First make sure to remove all data from the Amazon Simple Storage Service (Amazon S3) Buckets.
105+
106+
```shell
107+
$ cdk destroy RFPAnswers-IngestionStack
108+
```
109+
110+
```shell
111+
$ cdk destroy RFPAnswers-InferenceStack
112+
```
113+
114+
Delete all the associated logs created by the different services in Amazon CloudWatch logs.

samples/rfp-answer-generation/backend/stack/lambdas/shared/processor.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111
# and limitations under the License.
1212
#
1313

14+
import logging
1415
import numpy as np
16+
import os
1517
import pandas as pd
1618

1719
from botocore.config import Config
@@ -20,6 +22,9 @@
2022

2123
from .utils import extract_items_from_tagged_list, get_bedrock_runtime
2224

25+
logger = logging.getLogger()
26+
logger.setLevel(os.getenv("LOG_LEVEL", "INFO"))
27+
2328
PROMPT_TEMPLATE = """
2429
You have received a previously answered Request For Proposal (RFP) document. Your task is to extract, ipsis literis, ALL of the questions and answers from every sheet in the document. Provide your answer enclosed by <rfp></rfp> tags.
2530
@@ -35,13 +40,13 @@
3540
3641
3. If the document contains instructions from the requester, do not extract them.
3742
38-
4. Identify each group of questions in the RFP and the topic name for the group. The document format can very, and the topic can be specified in a column beside the question or in a single cell before a group of questions. Identify the topic name with <topic_name></topic_name> tags.
43+
4. Identify each group of questions in the RFP and the topic name for the group. The document format can very, and the topic can be specified in a column beside the question or in a single cell before a group of questions. If the topic name is specified in a column alongside the question and the answer, group questions with the same topics together. Identify the topic name with <topic_name></topic_name> tags.
3944
4045
5. Identify each question and its answer.
4146
4247
6. Under the <topic></topic> tags, enclose the question in <question></question> tags.
4348
44-
7. Under the <topic></topic> tags, enclose the answer in <answer></answer> tags.
49+
7. Under the <topic></topic> tags, enclose the answer in <answer></answer> tags. If there is no answer to the question, add an empty <answer></answer> tag.
4550
4651
Output Example:
4752
<rfp>
@@ -51,6 +56,15 @@
5156
<answer>Oktank LDTA</answer>
5257
</topic>
5358
</rfp>
59+
60+
Output Example:
61+
<rfp>
62+
<topic>
63+
<topic_name>Company Information</topic_name>
64+
<question>Total number of employees</question>
65+
<answer></answer>
66+
</topic>
67+
</rfp>
5468
"""
5569

5670

@@ -71,7 +85,7 @@ class BedrockKBProcessor:
7185
def __init__(
7286
self,
7387
model_id="anthropic.claude-3-5-sonnet-20240620-v1:0",
74-
separator=",",
88+
separator=";",
7589
):
7690
self.model_id: str = model_id
7791
self.separator: str = separator
@@ -100,10 +114,9 @@ def process_file(self, filename: str, file_date: str) -> list[RFPChunk]:
100114
"date": file_date,
101115
}
102116
for q, a in zip(questions, answers)
103-
if a
104117
]
105118

106-
print(f"Extracted: {len(extracted_qa)} QA pairs.")
119+
logger.info(f"Extracted: {len(extracted_qa)} QA pairs.")
107120

108121
rfp_chunks += extracted_qa
109122

@@ -160,13 +173,13 @@ def __extract_data_concurrent(self, filename):
160173
return [r.result() for r in as_completed(futures)]
161174

162175
def __load_file_in_sections(self, filename: str) -> list[pd.DataFrame]:
163-
print(filename)
176+
logger.info(filename)
164177
extension: str = filename.split(".")[-1].upper()
165-
print(f"File extension: {extension}")
178+
logger.info(f"File extension: {extension}")
166179
sections: list[pd.DataFrame] = []
167180

168181
if extension == "XLSX":
169-
print("Loaded XLSX")
182+
logger.info("Loaded XLSX")
170183
sheets_dict = pd.read_excel(
171184
filename, sheet_name=None, header=None, engine="openpyxl"
172185
)
@@ -175,10 +188,10 @@ def __load_file_in_sections(self, filename: str) -> list[pd.DataFrame]:
175188
sections += np.split(sheet, sheet[sheet.isnull().all(1)].index)
176189

177190
else:
178-
print("Loaded CSV")
179-
sheet = pd.read_csv(filename, header=None, sep=self.separator)
191+
logger.info("Loaded CSV")
192+
sheet = pd.read_csv(filename, header=0, sep=self.separator)
180193
sections += np.split(sheet, sheet[sheet.isnull().all(1)].index)
181194

182-
print(f"Loaded {len(sections)} sheets")
195+
logger.info(f"Loaded {len(sections)} sheets")
183196

184197
return sections

samples/rfp-answer-generation/backend/stack/stack_constructs/sfn_step_answer_question.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ def __init__(
6767
],
6868
resources=[
6969
f"arn:aws:bedrock:{Stack.of(self).region}::foundation-model/anthropic.claude-3-5-sonnet-20240620-v1:0",
70+
f"arn:aws:bedrock:{Stack.of(self).region}::foundation-model/anthropic.claude-3-haiku-20240307-v1:0",
7071
],
7172
)
7273
)
112 KB
Loading
319 KB
Loading
406 KB
Loading
520 KB
Loading
510 KB
Loading
443 KB
Loading

0 commit comments

Comments
 (0)