@@ -57,6 +57,9 @@ def __init__(self, *args, **kwargs):
5757
5858 self .server = Server (self )
5959
60+ # For the 01. This lets the OAI compatible server accumulate context before responding.
61+ self .context_mode = False
62+
6063 async def input (self , chunk ):
6164 """
6265 Accumulates LMC chunks onto interpreter.messages.
@@ -773,6 +776,14 @@ async def chat_completion(request: ChatCompletionRequest):
773776 # Handle special STOP token
774777 return
775778
779+ if last_message .content == "{CONTEXT_MODE_ON}" :
780+ async_interpreter .context_mode = True
781+ return
782+
783+ if last_message .content == "{CONTEXT_MODE_OFF}" :
784+ async_interpreter .context_mode = False
785+ return
786+
776787 if type (last_message .content ) == str :
777788 async_interpreter .messages .append (
778789 {
@@ -812,7 +823,9 @@ async def chat_completion(request: ChatCompletionRequest):
812823 }
813824 )
814825
815- if os .getenv ("INTERPRETER_SERVER_REQUIRE_START" , False ):
826+ if async_interpreter .context_mode :
827+ # In context mode, we only respond if we recieved a {START} message
828+ # Otherwise, we're just accumulating context
816829 if last_message .content != "{START}" :
817830 return
818831 if async_interpreter .messages [- 1 ]["content" ] == "{START}" :
0 commit comments