apply feedback from pr review

john0isaac · john0isaac · commit f5a27339d694 · 2024-07-23T15:13:36.000+03:00
diff --git a/src/backend/fastapi_app/rag_advanced.py b/src/backend/fastapi_app/rag_advanced.py
@@ -16,7 +16,7 @@
 from fastapi_app.postgres_models import Item
 from fastapi_app.postgres_searcher import PostgresSearcher
 from fastapi_app.query_rewriter import build_search_function, extract_search_arguments
-from fastapi_app.rag_simple import ChatParams, RAGChatBase
+from fastapi_app.rag_base import ChatParams, RAGChatBase
 
 
 class AdvancedRAGChat(RAGChatBase):
@@ -35,14 +35,14 @@ def __init__(
         self.chat_token_limit = get_token_limit(chat_model, default_to_minimum=True)
 
     async def generate_search_query(
-        self, chat_params: ChatParams, query_response_token_limit: int
+        self, original_user_query: str, past_messages: list[ChatCompletionMessageParam], query_response_token_limit: int
     ) -> tuple[list[ChatCompletionMessageParam], Any | str | None, list]:
         """Generate an optimized keyword search query based on the chat history and the last question"""
         query_messages: list[ChatCompletionMessageParam] = build_messages(
             model=self.chat_model,
             system_prompt=self.query_prompt_template,
-            new_user_content=chat_params.original_user_query,
-            past_messages=chat_params.past_messages,
+            new_user_content=original_user_query,
+            past_messages=past_messages,
             max_tokens=self.chat_token_limit - query_response_token_limit,  # TODO: count functions
             fallback_to_default=True,
         )
@@ -58,11 +58,11 @@ async def generate_search_query(
             tool_choice="auto",
         )
 
-        query_text, filters = extract_search_arguments(chat_params.original_user_query, chat_completion)
+        query_text, filters = extract_search_arguments(original_user_query, chat_completion)
 
         return query_messages, query_text, filters
 
-    async def retreive_and_build_context(
+    async def retrieve_and_build_context(
         self, chat_params: ChatParams, query_text: str | Any | None, filters: list
     ) -> tuple[list[ChatCompletionMessageParam], list[Item]]:
         """Retrieve relevant items from the database and build a context for the chat model."""
@@ -98,12 +98,14 @@ async def run(
 
         # Generate an optimized keyword search query based on the chat history and the last question
         query_messages, query_text, filters = await self.generate_search_query(
-            chat_params=chat_params, query_response_token_limit=500
+            original_user_query=chat_params.original_user_query,
+            past_messages=chat_params.past_messages,
+            query_response_token_limit=500,
         )
 
         # Retrieve relevant items from the database with the GPT optimized query
         # Generate a contextual and content specific answer using the search results and chat history
-        contextual_messages, results = await self.retreive_and_build_context(
+        contextual_messages, results = await self.retrieve_and_build_context(
             chat_params=chat_params, query_text=query_text, filters=filters
         )
 
@@ -167,14 +169,13 @@ async def run_stream(
     ) -> AsyncGenerator[RetrievalResponseDelta, None]:
         chat_params = self.get_params(messages, overrides)
 
-        # Generate an optimized keyword search query based on the chat history and the last question
         query_messages, query_text, filters = await self.generate_search_query(
-            chat_params=chat_params, query_response_token_limit=500
+            original_user_query=chat_params.original_user_query,
+            past_messages=chat_params.past_messages,
+            query_response_token_limit=500,
         )
 
-        # Retrieve relevant items from the database with the GPT optimized query
-        # Generate a contextual and content specific answer using the search results and chat history
-        contextual_messages, results = await self.retreive_and_build_context(
+        contextual_messages, results = await self.retrieve_and_build_context(
             chat_params=chat_params, query_text=query_text, filters=filters
         )
 
diff --git a/src/backend/fastapi_app/rag_base.py b/src/backend/fastapi_app/rag_base.py
@@ -0,0 +1,82 @@
+import pathlib
+from abc import ABC, abstractmethod
+from collections.abc import AsyncGenerator
+from typing import Any
+
+from openai.types.chat import ChatCompletionMessageParam
+from pydantic import BaseModel
+
+from fastapi_app.api_models import (
+    RetrievalResponse,
+    RetrievalResponseDelta,
+)
+from fastapi_app.postgres_models import Item
+
+
+class ChatParams(BaseModel):
+    top: int = 3
+    temperature: float = 0.3
+    response_token_limit: int = 1024
+    enable_text_search: bool
+    enable_vector_search: bool
+    original_user_query: str
+    past_messages: list[ChatCompletionMessageParam]
+    prompt_template: str
+
+
+class RAGChatBase(ABC):
+    current_dir = pathlib.Path(__file__).parent
+    query_prompt_template = open(current_dir / "prompts/query.txt").read()
+    answer_prompt_template = open(current_dir / "prompts/answer.txt").read()
+
+    def get_params(self, messages: list[ChatCompletionMessageParam], overrides: dict[str, Any]) -> ChatParams:
+        top: int = overrides.get("top", 3)
+        temperature: float = overrides.get("temperature", 0.3)
+        response_token_limit = 1024
+        prompt_template = overrides.get("prompt_template") or self.answer_prompt_template
+
+        enable_text_search = overrides.get("retrieval_mode") in ["text", "hybrid", None]
+        enable_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
+
+        original_user_query = messages[-1]["content"]
+        if not isinstance(original_user_query, str):
+            raise ValueError("The most recent message content must be a string.")
+        past_messages = messages[:-1]
+
+        return ChatParams(
+            top=top,
+            temperature=temperature,
+            response_token_limit=response_token_limit,
+            prompt_template=prompt_template,
+            enable_text_search=enable_text_search,
+            enable_vector_search=enable_vector_search,
+            original_user_query=original_user_query,
+            past_messages=past_messages,
+        )
+
+    @abstractmethod
+    async def retrieve_and_build_context(
+        self,
+        chat_params: ChatParams,
+        *args,
+        **kwargs,
+    ) -> tuple[list[ChatCompletionMessageParam], list[Item]]:
+        raise NotImplementedError
+
+    @abstractmethod
+    async def run(
+        self,
+        messages: list[ChatCompletionMessageParam],
+        overrides: dict[str, Any] = {},
+    ) -> RetrievalResponse:
+        raise NotImplementedError
+
+    @abstractmethod
+    async def run_stream(
+        self,
+        messages: list[ChatCompletionMessageParam],
+        overrides: dict[str, Any] = {},
+    ) -> AsyncGenerator[RetrievalResponseDelta, None]:
+        raise NotImplementedError
+        if False:
+            yield 0
diff --git a/src/backend/fastapi_app/rag_simple.py b/src/backend/fastapi_app/rag_simple.py
@@ -1,12 +1,9 @@
-import pathlib
-from abc import ABC, abstractmethod
 from collections.abc import AsyncGenerator
 from typing import Any
 
 from openai import AsyncAzureOpenAI, AsyncOpenAI, AsyncStream
 from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessageParam
 from openai_messages_token_helper import build_messages, get_token_limit
-from pydantic import BaseModel
 
 from fastapi_app.api_models import (
     AIChatRoles,
@@ -18,75 +15,7 @@
 )
 from fastapi_app.postgres_models import Item
 from fastapi_app.postgres_searcher import PostgresSearcher
-
-
-class ChatParams(BaseModel):
-    top: int = 3
-    temperature: float = 0.3
-    response_token_limit: int = 1024
-    enable_text_search: bool
-    enable_vector_search: bool
-    original_user_query: str
-    past_messages: list[ChatCompletionMessageParam]
-    prompt_template: str
-
-
-class RAGChatBase(ABC):
-    current_dir = pathlib.Path(__file__).parent
-    query_prompt_template = open(current_dir / "prompts/query.txt").read()
-    answer_prompt_template = open(current_dir / "prompts/answer.txt").read()
-
-    def get_params(self, messages: list[ChatCompletionMessageParam], overrides: dict[str, Any]) -> ChatParams:
-        top: int = overrides.get("top", 3)
-        temperature: float = overrides.get("temperature", 0.3)
-        response_token_limit = 1024
-        prompt_template = overrides.get("prompt_template") or self.answer_prompt_template
-
-        enable_text_search = overrides.get("retrieval_mode") in ["text", "hybrid", None]
-        enable_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
-
-        original_user_query = messages[-1]["content"]
-        if not isinstance(original_user_query, str):
-            raise ValueError("The most recent message content must be a string.")
-        past_messages = messages[:-1]
-
-        return ChatParams(
-            top=top,
-            temperature=temperature,
-            response_token_limit=response_token_limit,
-            prompt_template=prompt_template,
-            enable_text_search=enable_text_search,
-            enable_vector_search=enable_vector_search,
-            original_user_query=original_user_query,
-            past_messages=past_messages,
-        )
-
-    @abstractmethod
-    async def run(
-        self,
-        messages: list[ChatCompletionMessageParam],
-        overrides: dict[str, Any] = {},
-    ) -> RetrievalResponse:
-        raise NotImplementedError
-
-    @abstractmethod
-    async def retrieve_and_build_context(
-        self,
-        chat_params: ChatParams,
-        *args,
-        **kwargs,
-    ) -> tuple[list[ChatCompletionMessageParam], list[Item]]:
-        raise NotImplementedError
-
-    @abstractmethod
-    async def run_stream(
-        self,
-        messages: list[ChatCompletionMessageParam],
-        overrides: dict[str, Any] = {},
-    ) -> AsyncGenerator[RetrievalResponseDelta, None]:
-        raise NotImplementedError
-        if False:
-            yield 0
+from fastapi_app.rag_base import ChatParams, RAGChatBase
 
 
 class SimpleRAGChat(RAGChatBase):
@@ -104,7 +33,7 @@ def __init__(
         self.chat_deployment = chat_deployment
         self.chat_token_limit = get_token_limit(chat_model, default_to_minimum=True)
 
-    async def retreive_and_build_context(
+    async def retrieve_and_build_context(
         self, chat_params: ChatParams
     ) -> tuple[list[ChatCompletionMessageParam], list[Item]]:
         """Retrieve relevant items from the database and build a context for the chat model."""
@@ -138,9 +67,7 @@ async def run(
     ) -> RetrievalResponse:
         chat_params = self.get_params(messages, overrides)
 
-        # Retrieve relevant items from the database
-        # Generate a contextual and content specific answer using the search results and chat history
-        contextual_messages, results = await self.retreive_and_build_context(chat_params=chat_params)
+        contextual_messages, results = await self.retrieve_and_build_context(chat_params=chat_params)
 
         chat_completion_response: ChatCompletion = await self.openai_chat_client.chat.completions.create(
             # Azure OpenAI takes the deployment name as the model name
@@ -192,9 +119,7 @@ async def run_stream(
     ) -> AsyncGenerator[RetrievalResponseDelta, None]:
         chat_params = self.get_params(messages, overrides)
 
-        # Retrieve relevant items from the database
-        # Generate a contextual and content specific answer using the search results and chat history
-        contextual_messages, results = await self.retreive_and_build_context(chat_params=chat_params)
+        contextual_messages, results = await self.retrieve_and_build_context(chat_params=chat_params)
 
         chat_completion_async_stream: AsyncStream[
             ChatCompletionChunk