Skip to content

Commit 134cc1a

Browse files
committed
Use Cross-region Inference; Retry request in case of Bedrock throttling
1 parent d0aa6bc commit 134cc1a

File tree

3 files changed

+45
-3
lines changed

3 files changed

+45
-3
lines changed

samples/contract-compliance-analysis/back-end/app_properties.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ language: English
1414

1515
# Claude Model ID (Global configuration). To switch to a smaller Language Model for cost savings).
1616
# Disabling the property will let each prompt execution to its default model id
17-
claude_model_id: anthropic.claude-3-haiku-20240307-v1:0
17+
claude_model_id: us.anthropic.claude-3-haiku-20240307-v1:0
1818

1919
# Thresholds determine the maximum number of clauses with risk that a contract can have without requiring human review,
2020
# per risk level

samples/contract-compliance-analysis/back-end/stack/sfn/common-layer/llm.py

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
import logging
1616
import os
1717

18+
from retrying import retry
19+
from botocore.config import Config
20+
from botocore.exceptions import ClientError
1821
from langchain_aws import ChatBedrock
1922
from langchain_core.messages import HumanMessage
2023
from langchain_core.prompts import ChatPromptTemplate
@@ -24,8 +27,45 @@
2427
logger = logging.getLogger()
2528
logger.setLevel(os.getenv("LOG_LEVEL", "INFO"))
2629

27-
bedrock_client = boto3.client('bedrock-runtime')
30+
bedrock_client = boto3.client('bedrock-runtime', config=Config(
31+
connect_timeout=180,
32+
read_timeout=180,
33+
retries={
34+
"max_attempts": 50,
35+
"mode": "adaptive",
36+
},
37+
))
2838

39+
class BedrockRetryableError(Exception):
40+
"""Custom exception for retryable Bedrock errors"""
41+
pass
42+
43+
@retry(
44+
wait_fixed=10000, # 10 seconds between retries
45+
stop_max_attempt_number=None, # Keep retrying indefinitely
46+
retry_on_exception=lambda ex: isinstance(ex, BedrockRetryableError),
47+
)
48+
def invoke_chain_with_retry(chain):
49+
"""Invoke Bedrock with retry logic for throttling"""
50+
try:
51+
return chain.invoke({})
52+
except ClientError as exc:
53+
logger.warning(f"Bedrock ClientError: {exc}")
54+
55+
if exc.response["Error"]["Code"] == "ThrottlingException":
56+
logger.warning("Bedrock throttling. Retrying...")
57+
raise BedrockRetryableError(str(exc))
58+
elif exc.response["Error"]["Code"] == "ModelTimeoutException":
59+
logger.warning("Bedrock ModelTimeoutException. Retrying...")
60+
raise BedrockRetryableError(str(exc))
61+
else:
62+
raise
63+
except bedrock_client.exceptions.ThrottlingException as throttlingExc:
64+
logger.warning("Bedrock ThrottlingException. Retrying...")
65+
raise BedrockRetryableError(str(throttlingExc))
66+
except bedrock_client.exceptions.ModelTimeoutException as timeoutExc:
67+
logger.warning("Bedrock ModelTimeoutException. Retrying...")
68+
raise BedrockRetryableError(str(timeoutExc))
2969

3070
def invoke_llm(prompt, model_id, temperature=0.5, top_k=None, top_p=0.8, max_new_tokens=4096, verbose=False):
3171
model_id = (model_id or CLAUDE_MODEL_ID)
@@ -57,7 +97,7 @@ def invoke_llm(prompt, model_id, temperature=0.5, top_k=None, top_p=0.8, max_new
5797
])
5898
chain = prompt | chat
5999

60-
response = chain.invoke({})
100+
response = invoke_chain_with_retry(chain)
61101
content = response.content
62102

63103
usage_data = None
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
retrying==1.3.4
2+
botocore==1.38.9

0 commit comments

Comments
 (0)