Skip to content

Commit b94d0e7

Browse files
authored
Merge branch 'main' into feature/existing-iam-role-template
2 parents 6639c31 + 37374e7 commit b94d0e7

File tree

4 files changed

+53
-8
lines changed

4 files changed

+53
-8
lines changed

docker-compose.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
version: '3.8'
2+
3+
services:
4+
bedrock-access-gateway:
5+
build:
6+
context: ./src
7+
dockerfile: Dockerfile_ecs
8+
ports:
9+
- "127.0.0.1:8000:8080"
10+
environment:
11+
- ENABLE_PROMPT_CACHING=true
12+
- API_KEY=${OPENAI_API_KEY}
13+
- AWS_PROFILE
14+
- AWS_ACCESS_KEY_ID
15+
- AWS_SECRET_ACCESS_KEY
16+
- AWS_SESSION_TOKEN
17+
volumes:
18+
- ${HOME}/.aws:/home/appuser/.aws

scripts/push-to-ecr.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ set -o errexit # exit on first error
77
set -o nounset # exit on using unset variables
88
set -o pipefail # exit on any error in a pipeline
99

10+
# Change to the directory where the script is located
11+
cd "$(dirname "$0")"
12+
1013
# Prompt user for inputs
1114
echo "================================================"
1215
echo "Bedrock Access Gateway - Build and Push to ECR"

src/Dockerfile_ecs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,6 @@ RUN python3 -c 'import tiktoken_ext.openai_public as tke; tke.cl100k_base()'
2121
ENV PORT=8080
2222

2323
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
24-
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8080/health').read()"
24+
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:${PORT}/health').read()"
2525

2626
CMD ["sh", "-c", "uvicorn api.app:app --host 0.0.0.0 --port ${PORT}"]

src/api/models/bedrock.py

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -964,11 +964,13 @@ def _create_response_stream(
964964
finish_reason = None
965965
message = None
966966
usage = None
967+
967968
if "messageStart" in chunk:
968969
message = ChatResponseMessage(
969970
role=chunk["messageStart"]["role"],
970971
content="",
971972
)
973+
972974
if "contentBlockStart" in chunk:
973975
# tool call start
974976
delta = chunk["contentBlockStart"]["start"]
@@ -988,25 +990,30 @@ def _create_response_stream(
988990
)
989991
]
990992
)
993+
991994
if "contentBlockDelta" in chunk:
992995
delta = chunk["contentBlockDelta"]["delta"]
993996
if "text" in delta:
994-
# stream content
995-
message = ChatResponseMessage(
996-
content=delta["text"],
997-
)
997+
# Regular text content - close thinking tag if open
998+
content = delta["text"]
999+
if self.think_emitted:
1000+
# Transition from reasoning to regular text
1001+
content = "</think>" + content
1002+
self.think_emitted = False
1003+
message = ChatResponseMessage(content=content)
9981004
elif "reasoningContent" in delta:
9991005
if "text" in delta["reasoningContent"]:
10001006
content = delta["reasoningContent"]["text"]
10011007
if not self.think_emitted:
1002-
# Port of "content_block_start" with "thinking"
1008+
# Start of reasoning content
10031009
content = "<think>" + content
10041010
self.think_emitted = True
10051011
message = ChatResponseMessage(content=content)
10061012
elif "signature" in delta["reasoningContent"]:
1007-
# Port of "signature_delta"
1013+
# Port of "signature_delta" (for models that send it)
10081014
if self.think_emitted:
1009-
message = ChatResponseMessage(content="\n </think> \n\n")
1015+
message = ChatResponseMessage(content="</think>")
1016+
self.think_emitted = False
10101017
else:
10111018
return None # Ignore signature if no <think> started
10121019
else:
@@ -1022,7 +1029,23 @@ def _create_response_stream(
10221029
)
10231030
]
10241031
)
1032+
10251033
if "messageStop" in chunk:
1034+
# Safety check: Close any open thinking tags before message stops
1035+
if self.think_emitted:
1036+
self.think_emitted = False
1037+
return ChatStreamResponse(
1038+
id=message_id,
1039+
model=model_id,
1040+
choices=[
1041+
ChoiceDelta(
1042+
index=0,
1043+
delta=ChatResponseMessage(content="</think>"),
1044+
logprobs=None,
1045+
finish_reason=None,
1046+
)
1047+
],
1048+
)
10261049
message = ChatResponseMessage()
10271050
finish_reason = chunk["messageStop"]["stopReason"]
10281051

@@ -1063,6 +1086,7 @@ def _create_response_stream(
10631086
prompt_tokens_details=prompt_tokens_details,
10641087
),
10651088
)
1089+
10661090
if message:
10671091
return ChatStreamResponse(
10681092
id=message_id,

0 commit comments

Comments
 (0)