diff --git a/.env.example b/.env.example index 18b34cb7..bcc44911 100644 --- a/.env.example +++ b/.env.example @@ -1,2 +1,8 @@ -# Copy this file to .env and add your actual API key +# Copy this file to .env and add your actual API keys + +# Company LLM Provider (Primary) - RDSec Internal API +RDSEC_API_ENDPOINT=https://api.rdsec.trendmicro.com/prod/aiendpoint/v1 +RDSEC_API_KEY=your-rdsec-api-key-here + +# Legacy Anthropic API (Fallback) ANTHROPIC_API_KEY=your-anthropic-api-key-here \ No newline at end of file diff --git a/backend/ai_generator.py b/backend/ai_generator.py index 0363ca90..1b716107 100644 --- a/backend/ai_generator.py +++ b/backend/ai_generator.py @@ -1,6 +1,167 @@ import anthropic +import openai +import tiktoken from typing import List, Optional, Dict, Any +class OpenAIGenerator: + """Handles interactions with company's OpenAI-compatible LLM API for generating responses""" + + # Static system prompt to avoid rebuilding on each call + SYSTEM_PROMPT = """ You are an AI assistant specialized in course materials and educational content with access to a comprehensive search tool for course information. + +Search Tool Usage: +- Use the search tool **only** for questions about specific course content or detailed educational materials +- **One search per query maximum** +- Synthesize search results into accurate, fact-based responses +- If search yields no results, state this clearly without offering alternatives + +Response Protocol: +- **General knowledge questions**: Answer using existing knowledge without searching +- **Course-specific questions**: Search first, then answer +- **No meta-commentary**: + - Provide direct answers only — no reasoning process, search explanations, or question-type analysis + - Do not mention "based on the search results" + + +All responses must be: +1. **Brief, Concise and focused** - Get to the point quickly +2. **Educational** - Maintain instructional value +3. **Clear** - Use accessible language +4. **Example-supported** - Include relevant examples when they aid understanding +Provide only the direct answer to what was asked. +""" + + def __init__(self, api_key: str, base_url: str, model: str): + self.client = openai.OpenAI(api_key=api_key, base_url=base_url) + self.model = model + + # Pre-build base API parameters + self.base_params = { + "model": self.model, + "temperature": 0, + "max_tokens": 800 + } + + # Initialize tokenizer for usage tracking + try: + self.encoding = tiktoken.encoding_for_model("gpt-4") # Use gpt-4 encoding as fallback + except KeyError: + self.encoding = tiktoken.get_encoding("cl100k_base") # Default encoding + + def generate_response(self, query: str, + conversation_history: Optional[str] = None, + tools: Optional[List] = None, + tool_manager=None) -> str: + """ + Generate AI response with optional tool usage and conversation context. + + Args: + query: The user's question or request + conversation_history: Previous messages for context + tools: Available tools the AI can use + tool_manager: Manager to execute tools + + Returns: + Generated response as string + """ + + # Build system content efficiently - avoid string ops when possible + system_content = ( + f"{self.SYSTEM_PROMPT}\n\nPrevious conversation:\n{conversation_history}" + if conversation_history + else self.SYSTEM_PROMPT + ) + + # Prepare messages for OpenAI format + messages = [ + {"role": "system", "content": system_content}, + {"role": "user", "content": query} + ] + + # Prepare API call parameters efficiently + api_params = { + **self.base_params, + "messages": messages + } + + # Convert Anthropic tools to OpenAI functions format + if tools: + api_params["functions"] = self._convert_tools_to_functions(tools) + api_params["function_call"] = "auto" + + # Get response from company LLM + response = self.client.chat.completions.create(**api_params) + + # Handle function execution if needed + if response.choices[0].finish_reason == "function_call" and tool_manager: + return self._handle_function_execution(response, api_params, tool_manager) + + # Return direct response + return response.choices[0].message.content + + def _convert_tools_to_functions(self, tools: List) -> List[Dict]: + """Convert Anthropic tool format to OpenAI function format""" + functions = [] + for tool in tools: + function = { + "name": tool["name"], + "description": tool["description"], + "parameters": tool["input_schema"] + } + functions.append(function) + return functions + + def _handle_function_execution(self, initial_response, base_params: Dict[str, Any], tool_manager): + """ + Handle execution of function calls and get follow-up response. + + Args: + initial_response: The response containing function call requests + base_params: Base API parameters + tool_manager: Manager to execute tools + + Returns: + Final response text after function execution + """ + # Start with existing messages + messages = base_params["messages"].copy() + + # Get the function call from response + function_call = initial_response.choices[0].message.function_call + + # Add AI's function call response + messages.append({ + "role": "assistant", + "content": None, + "function_call": { + "name": function_call.name, + "arguments": function_call.arguments + } + }) + + # Execute function call + import json + function_args = json.loads(function_call.arguments) + function_result = tool_manager.execute_tool(function_call.name, **function_args) + + # Add function result + messages.append({ + "role": "function", + "name": function_call.name, + "content": str(function_result) + }) + + # Prepare final API call without functions + final_params = { + **self.base_params, + "messages": messages + } + + # Get final response + final_response = self.client.chat.completions.create(**final_params) + return final_response.choices[0].message.content + + class AIGenerator: """Handles interactions with Anthropic's Claude API for generating responses""" diff --git a/backend/config.py b/backend/config.py index d9f6392e..28e23b65 100644 --- a/backend/config.py +++ b/backend/config.py @@ -8,7 +8,12 @@ @dataclass class Config: """Configuration settings for the RAG system""" - # Anthropic API settings + # Company LLM API settings (following LLMProvider.md specifications) + RDSEC_API_ENDPOINT: str = os.getenv("RDSEC_API_ENDPOINT", "") + RDSEC_API_KEY: str = os.getenv("RDSEC_API_KEY", "") + RDSEC_MODEL: str = "gpt-4o" # Using gpt-4o as specified in LLMProvider.md + + # Legacy Anthropic settings (kept for fallback if needed) ANTHROPIC_API_KEY: str = os.getenv("ANTHROPIC_API_KEY", "") ANTHROPIC_MODEL: str = "claude-sonnet-4-20250514" diff --git a/backend/rag_system.py b/backend/rag_system.py index 50d848c8..47f3e765 100644 --- a/backend/rag_system.py +++ b/backend/rag_system.py @@ -2,7 +2,7 @@ import os from document_processor import DocumentProcessor from vector_store import VectorStore -from ai_generator import AIGenerator +from ai_generator import AIGenerator, OpenAIGenerator from session_manager import SessionManager from search_tools import ToolManager, CourseSearchTool from models import Course, Lesson, CourseChunk @@ -16,7 +16,21 @@ def __init__(self, config): # Initialize core components self.document_processor = DocumentProcessor(config.CHUNK_SIZE, config.CHUNK_OVERLAP) self.vector_store = VectorStore(config.CHROMA_PATH, config.EMBEDDING_MODEL, config.MAX_RESULTS) - self.ai_generator = AIGenerator(config.ANTHROPIC_API_KEY, config.ANTHROPIC_MODEL) + + # Initialize AI generator with company LLM as primary + if config.RDSEC_API_KEY and config.RDSEC_API_ENDPOINT: + print("Using company RDSec LLM provider") + self.ai_generator = OpenAIGenerator( + config.RDSEC_API_KEY, + config.RDSEC_API_ENDPOINT, + config.RDSEC_MODEL + ) + elif config.ANTHROPIC_API_KEY: + print("Falling back to Anthropic LLM provider") + self.ai_generator = AIGenerator(config.ANTHROPIC_API_KEY, config.ANTHROPIC_MODEL) + else: + raise ValueError("No valid LLM API key found. Please set RDSEC_API_KEY or ANTHROPIC_API_KEY in your .env file") + self.session_manager = SessionManager(config.MAX_HISTORY) # Initialize search tools diff --git a/pyproject.toml b/pyproject.toml index 3f05e2de..71f15468 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,6 +7,8 @@ requires-python = ">=3.13" dependencies = [ "chromadb==1.0.15", "anthropic==0.58.2", + "openai>=1.0.0", + "tiktoken>=0.5.0", "sentence-transformers==5.0.0", "fastapi==0.116.1", "uvicorn==0.35.0", diff --git a/run.sh b/run.sh index 80e3853d..0335c56e 100755 --- a/run.sh +++ b/run.sh @@ -10,7 +10,8 @@ if [ ! -d "backend" ]; then fi echo "Starting Course Materials RAG System..." -echo "Make sure you have set your ANTHROPIC_API_KEY in .env" +echo "Make sure you have set your RDSEC_API_KEY and RDSEC_API_ENDPOINT in .env" +echo "(Anthropic API key can be used as fallback)" # Change to backend directory and start the server cd backend && uv run uvicorn app:app --reload --port 8000 \ No newline at end of file diff --git a/uv.lock b/uv.lock index 9ae65c55..ea03961e 100644 --- a/uv.lock +++ b/uv.lock @@ -860,6 +860,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5d/15/d75fd66aba116ce3732bb1050401394c5ec52074c4f7ee18db8838dd4667/onnxruntime-1.22.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6e7e823624b015ea879d976cbef8bfaed2f7e2cc233d7506860a76dd37f8f381", size = 16477261, upload-time = "2025-07-10T19:16:03.226Z" }, ] +[[package]] +name = "openai" +version = "1.102.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "httpx" }, + { name = "jiter" }, + { name = "pydantic" }, + { name = "sniffio" }, + { name = "tqdm" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/07/55/da5598ed5c6bdd9939633854049cddc5cbac0da938dfcfcb3c6b119c16c0/openai-1.102.0.tar.gz", hash = "sha256:2e0153bcd64a6523071e90211cbfca1f2bbc5ceedd0993ba932a5869f93b7fc9", size = 519027, upload-time = "2025-08-26T20:50:29.397Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bd/0d/c9e7016d82c53c5b5e23e2bad36daebb8921ed44f69c0a985c6529a35106/openai-1.102.0-py3-none-any.whl", hash = "sha256:d751a7e95e222b5325306362ad02a7aa96e1fab3ed05b5888ce1c7ca63451345", size = 812015, upload-time = "2025-08-26T20:50:27.219Z" }, +] + [[package]] name = "opentelemetry-api" version = "1.35.0" @@ -1555,9 +1574,11 @@ dependencies = [ { name = "anthropic" }, { name = "chromadb" }, { name = "fastapi" }, + { name = "openai" }, { name = "python-dotenv" }, { name = "python-multipart" }, { name = "sentence-transformers" }, + { name = "tiktoken" }, { name = "uvicorn" }, ] @@ -1566,9 +1587,11 @@ requires-dist = [ { name = "anthropic", specifier = "==0.58.2" }, { name = "chromadb", specifier = "==1.0.15" }, { name = "fastapi", specifier = "==0.116.1" }, + { name = "openai", specifier = ">=1.0.0" }, { name = "python-dotenv", specifier = "==1.1.1" }, { name = "python-multipart", specifier = "==0.0.20" }, { name = "sentence-transformers", specifier = "==5.0.0" }, + { name = "tiktoken", specifier = ">=0.5.0" }, { name = "uvicorn", specifier = "==0.35.0" }, ] @@ -1602,6 +1625,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" }, ] +[[package]] +name = "tiktoken" +version = "0.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "regex" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a7/86/ad0155a37c4f310935d5ac0b1ccf9bdb635dcb906e0a9a26b616dd55825a/tiktoken-0.11.0.tar.gz", hash = "sha256:3c518641aee1c52247c2b97e74d8d07d780092af79d5911a6ab5e79359d9b06a", size = 37648, upload-time = "2025-08-08T23:58:08.495Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cc/cd/a9034bcee638716d9310443818d73c6387a6a96db93cbcb0819b77f5b206/tiktoken-0.11.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a5f3f25ffb152ee7fec78e90a5e5ea5b03b4ea240beed03305615847f7a6ace2", size = 1055339, upload-time = "2025-08-08T23:57:51.802Z" }, + { url = "https://files.pythonhosted.org/packages/f1/91/9922b345f611b4e92581f234e64e9661e1c524875c8eadd513c4b2088472/tiktoken-0.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7dc6e9ad16a2a75b4c4be7208055a1f707c9510541d94d9cc31f7fbdc8db41d8", size = 997080, upload-time = "2025-08-08T23:57:53.442Z" }, + { url = "https://files.pythonhosted.org/packages/d0/9d/49cd047c71336bc4b4af460ac213ec1c457da67712bde59b892e84f1859f/tiktoken-0.11.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a0517634d67a8a48fd4a4ad73930c3022629a85a217d256a6e9b8b47439d1e4", size = 1128501, upload-time = "2025-08-08T23:57:54.808Z" }, + { url = "https://files.pythonhosted.org/packages/52/d5/a0dcdb40dd2ea357e83cb36258967f0ae96f5dd40c722d6e382ceee6bba9/tiktoken-0.11.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7fb4effe60574675118b73c6fbfd3b5868e5d7a1f570d6cc0d18724b09ecf318", size = 1182743, upload-time = "2025-08-08T23:57:56.307Z" }, + { url = "https://files.pythonhosted.org/packages/3b/17/a0fc51aefb66b7b5261ca1314afa83df0106b033f783f9a7bcbe8e741494/tiktoken-0.11.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:94f984c9831fd32688aef4348803b0905d4ae9c432303087bae370dc1381a2b8", size = 1244057, upload-time = "2025-08-08T23:57:57.628Z" }, + { url = "https://files.pythonhosted.org/packages/50/79/bcf350609f3a10f09fe4fc207f132085e497fdd3612f3925ab24d86a0ca0/tiktoken-0.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:2177ffda31dec4023356a441793fed82f7af5291120751dee4d696414f54db0c", size = 883901, upload-time = "2025-08-08T23:57:59.359Z" }, +] + [[package]] name = "tokenizers" version = "0.21.2"