77
88from fastapi_app .api_models import (
99 AIChatRoles ,
10- ChatRequestOverrides ,
1110 Message ,
1211 RAGContext ,
1312 RetrievalResponse ,
@@ -63,10 +62,15 @@ async def generate_search_query(
6362
6463 return query_messages , query_text , filters
6564
66- async def retrieve_and_build_context (
67- self , chat_params : ChatParams , query_text : str | Any | None , filters : list
68- ) -> tuple [list [ChatCompletionMessageParam ], list [Item ]]:
69- """Retrieve relevant items from the database and build a context for the chat model."""
65+ async def prepare_context (
66+ self , chat_params : ChatParams
67+ ) -> tuple [list [ChatCompletionMessageParam ], list [Item ], list [ThoughtStep ]]:
68+ query_messages , query_text , filters = await self .generate_search_query (
69+ original_user_query = chat_params .original_user_query ,
70+ past_messages = chat_params .past_messages ,
71+ query_response_token_limit = 500 ,
72+ )
73+
7074 # Retrieve relevant items from the database with the GPT optimized query
7175 results = await self .searcher .search_and_embed (
7276 query_text ,
@@ -88,28 +92,41 @@ async def retrieve_and_build_context(
8892 max_tokens = self .chat_token_limit - chat_params .response_token_limit ,
8993 fallback_to_default = True ,
9094 )
91- return contextual_messages , results
9295
93- async def run (
96+ thoughts = [
97+ ThoughtStep (
98+ title = "Prompt to generate search arguments" ,
99+ description = [str (message ) for message in query_messages ],
100+ props = (
101+ {"model" : self .chat_model , "deployment" : self .chat_deployment }
102+ if self .chat_deployment
103+ else {"model" : self .chat_model }
104+ ),
105+ ),
106+ ThoughtStep (
107+ title = "Search using generated search arguments" ,
108+ description = query_text ,
109+ props = {
110+ "top" : chat_params .top ,
111+ "vector_search" : chat_params .enable_vector_search ,
112+ "text_search" : chat_params .enable_text_search ,
113+ "filters" : filters ,
114+ },
115+ ),
116+ ThoughtStep (
117+ title = "Search results" ,
118+ description = [result .to_dict () for result in results ],
119+ ),
120+ ]
121+ return contextual_messages , results , thoughts
122+
123+ async def answer (
94124 self ,
95- messages : list [ChatCompletionMessageParam ],
96- overrides : ChatRequestOverrides ,
125+ chat_params : ChatParams ,
126+ contextual_messages : list [ChatCompletionMessageParam ],
127+ results : list [Item ],
128+ earlier_thoughts : list [ThoughtStep ],
97129 ) -> RetrievalResponse :
98- chat_params = self .get_params (messages , overrides )
99-
100- # Generate an optimized keyword search query based on the chat history and the last question
101- query_messages , query_text , filters = await self .generate_search_query (
102- original_user_query = chat_params .original_user_query ,
103- past_messages = chat_params .past_messages ,
104- query_response_token_limit = 500 ,
105- )
106-
107- # Retrieve relevant items from the database with the GPT optimized query
108- # Generate a contextual and content specific answer using the search results and chat history
109- contextual_messages , results = await self .retrieve_and_build_context (
110- chat_params = chat_params , query_text = query_text , filters = filters
111- )
112-
113130 chat_completion_response : ChatCompletion = await self .openai_chat_client .chat .completions .create (
114131 # Azure OpenAI takes the deployment name as the model name
115132 model = self .chat_deployment if self .chat_deployment else self .chat_model ,
@@ -126,30 +143,8 @@ async def run(
126143 ),
127144 context = RAGContext (
128145 data_points = {item .id : item .to_dict () for item in results },
129- thoughts = [
130- ThoughtStep (
131- title = "Prompt to generate search arguments" ,
132- description = [str (message ) for message in query_messages ],
133- props = (
134- {"model" : self .chat_model , "deployment" : self .chat_deployment }
135- if self .chat_deployment
136- else {"model" : self .chat_model }
137- ),
138- ),
139- ThoughtStep (
140- title = "Search using generated search arguments" ,
141- description = query_text ,
142- props = {
143- "top" : chat_params .top ,
144- "vector_search" : chat_params .enable_vector_search ,
145- "text_search" : chat_params .enable_text_search ,
146- "filters" : filters ,
147- },
148- ),
149- ThoughtStep (
150- title = "Search results" ,
151- description = [result .to_dict () for result in results ],
152- ),
146+ thoughts = earlier_thoughts
147+ + [
153148 ThoughtStep (
154149 title = "Prompt to generate answer" ,
155150 description = [str (message ) for message in contextual_messages ],
@@ -163,23 +158,13 @@ async def run(
163158 ),
164159 )
165160
166- async def run_stream (
161+ async def answer_stream (
167162 self ,
168- messages : list [ChatCompletionMessageParam ],
169- overrides : ChatRequestOverrides ,
163+ chat_params : ChatParams ,
164+ contextual_messages : list [ChatCompletionMessageParam ],
165+ results : list [Item ],
166+ earlier_thoughts : list [ThoughtStep ],
170167 ) -> AsyncGenerator [RetrievalResponseDelta , None ]:
171- chat_params = self .get_params (messages , overrides )
172-
173- query_messages , query_text , filters = await self .generate_search_query (
174- original_user_query = chat_params .original_user_query ,
175- past_messages = chat_params .past_messages ,
176- query_response_token_limit = 500 ,
177- )
178-
179- contextual_messages , results = await self .retrieve_and_build_context (
180- chat_params = chat_params , query_text = query_text , filters = filters
181- )
182-
183168 chat_completion_async_stream : AsyncStream [
184169 ChatCompletionChunk
185170 ] = await self .openai_chat_client .chat .completions .create (
@@ -192,38 +177,11 @@ async def run_stream(
192177 stream = True ,
193178 )
194179
195- # Forcefully close the database session before yielding the response
196- # Yielding keeps the connection open while streaming the response until the end
197- # The connection closes when it returns back to the context manger in the dependencies
198- await self .searcher .db_session .close ()
199-
200180 yield RetrievalResponseDelta (
201181 context = RAGContext (
202182 data_points = {item .id : item .to_dict () for item in results },
203- thoughts = [
204- ThoughtStep (
205- title = "Prompt to generate search arguments" ,
206- description = [str (message ) for message in query_messages ],
207- props = (
208- {"model" : self .chat_model , "deployment" : self .chat_deployment }
209- if self .chat_deployment
210- else {"model" : self .chat_model }
211- ),
212- ),
213- ThoughtStep (
214- title = "Search using generated search arguments" ,
215- description = query_text ,
216- props = {
217- "top" : chat_params .top ,
218- "vector_search" : chat_params .enable_vector_search ,
219- "text_search" : chat_params .enable_text_search ,
220- "filters" : filters ,
221- },
222- ),
223- ThoughtStep (
224- title = "Search results" ,
225- description = [result .to_dict () for result in results ],
226- ),
183+ thoughts = earlier_thoughts
184+ + [
227185 ThoughtStep (
228186 title = "Prompt to generate answer" ,
229187 description = [str (message ) for message in contextual_messages ],
0 commit comments