@@ -209,6 +209,31 @@ def generate_response(
209209
210210 def _handle_bedrock_request (self , prompt : str , retry_with_reduced_tokens : bool ):
211211 """Handle Bedrock requests with retry logic"""
212+
213+ # Check for custom endpoint configuration
214+ custom_config = get_custom_endpoint_config (self .model_id , "bedrock" )
215+
216+ if custom_config :
217+ # Use custom AWS credentials
218+ from botocore .config import Config
219+ retry_config = Config (
220+ region_name = custom_config .aws_region ,
221+ retries = {"max_attempts" : 2 , "mode" : "standard" },
222+ connect_timeout = 5 ,
223+ read_timeout = 3600
224+ )
225+ bedrock_client = boto3 .client (
226+ 'bedrock-runtime' ,
227+ aws_access_key_id = custom_config .aws_access_key_id ,
228+ aws_secret_access_key = custom_config .aws_secret_access_key ,
229+ region_name = custom_config .aws_region ,
230+ config = retry_config
231+ )
232+ print (f"Using custom Bedrock endpoint for model: { self .model_id } " )
233+ else :
234+ # Fallback to default bedrock client (environment/IAM credentials)
235+ bedrock_client = self .bedrock_client
236+
212237 retries = 0
213238 last_exception = None
214239 new_max_tokens = 8192
@@ -228,7 +253,7 @@ def _handle_bedrock_request(self, prompt: str, retry_with_reduced_tokens: bool):
228253 "stopSequences" : ["\n \n Human:" ],
229254
230255 }
231- response = self . bedrock_client .converse (
256+ response = bedrock_client .converse (
232257 modelId = self .model_id ,
233258 messages = conversation ,
234259 inferenceConfig = inference_config ,
@@ -242,7 +267,7 @@ def _handle_bedrock_request(self, prompt: str, retry_with_reduced_tokens: bool):
242267 "stopSequences" : []
243268 }
244269 print (inference_config )
245- response = self . bedrock_client .converse (
270+ response = bedrock_client .converse (
246271 modelId = self .model_id ,
247272 messages = conversation ,
248273 inferenceConfig = inference_config
@@ -270,11 +295,29 @@ def _handle_bedrock_request(self, prompt: str, retry_with_reduced_tokens: bool):
270295 self ._exponential_backoff (retries )
271296 retries += 1
272297
273- # Create a new client on connection errors
274- self .bedrock_client = boto3 .client (
275- service_name = "bedrock-runtime" ,
276- config = self .bedrock_client .meta .config
277- )
298+ # Create a new client on connection errors
299+ if custom_config :
300+ # Recreate with custom credentials
301+ from botocore .config import Config
302+ retry_config = Config (
303+ region_name = custom_config .aws_region ,
304+ retries = {"max_attempts" : 2 , "mode" : "standard" },
305+ connect_timeout = 5 ,
306+ read_timeout = 3600
307+ )
308+ bedrock_client = boto3 .client (
309+ 'bedrock-runtime' ,
310+ aws_access_key_id = custom_config .aws_access_key_id ,
311+ aws_secret_access_key = custom_config .aws_secret_access_key ,
312+ region_name = custom_config .aws_region ,
313+ config = retry_config
314+ )
315+ else :
316+ # Recreate default client
317+ bedrock_client = boto3 .client (
318+ service_name = "bedrock-runtime" ,
319+ config = self .bedrock_client .meta .config
320+ )
278321 continue
279322
280323 # Handle other AWS errors
0 commit comments