diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml new file mode 100644 index 00000000..9e23c7d5 --- /dev/null +++ b/.github/workflows/claude-code-review.yml @@ -0,0 +1,77 @@ +name: Claude Code Review + +on: + pull_request: + types: [opened, synchronize] + # Optional: Only run on specific file changes + # paths: + # - "src/**/*.ts" + # - "src/**/*.tsx" + # - "src/**/*.js" + # - "src/**/*.jsx" + +jobs: + claude-review: + # Optional: Filter by PR author + # if: | + # github.event.pull_request.user.login == 'external-contributor' || + # github.event.pull_request.user.login == 'new-developer' || + # github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR' + + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + issues: read + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Run Claude Code Review + id: claude-review + uses: anthropics/claude-code-action@beta + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + + # Optional: Specify model (defaults to Claude Sonnet 4, uncomment for Claude Opus 4.1) + # model: "claude-opus-4-1-20250805" + + # Direct prompt for automated review (no @claude mention needed) + direct_prompt: | + Please review this pull request and provide feedback on: + - Code quality and best practices + - Potential bugs or issues + - Performance considerations + - Security concerns + - Test coverage + + Be constructive and helpful in your feedback. + + # Optional: Use sticky comments to make Claude reuse the same comment on subsequent pushes to the same PR + # use_sticky_comment: true + + # Optional: Customize review based on file types + # direct_prompt: | + # Review this PR focusing on: + # - For TypeScript files: Type safety and proper interface usage + # - For API endpoints: Security, input validation, and error handling + # - For React components: Performance, accessibility, and best practices + # - For tests: Coverage, edge cases, and test quality + + # Optional: Different prompts for different authors + # direct_prompt: | + # ${{ github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR' && + # 'Welcome! Please review this PR from a first-time contributor. Be encouraging and provide detailed explanations for any suggestions.' || + # 'Please provide a thorough code review focusing on our coding standards and best practices.' }} + + # Optional: Add specific tools for running tests or linting + # allowed_tools: "Bash(npm run test),Bash(npm run lint),Bash(npm run typecheck)" + + # Optional: Skip review for certain conditions + # if: | + # !contains(github.event.pull_request.title, '[skip-review]') && + # !contains(github.event.pull_request.title, '[WIP]') diff --git a/.github/workflows/claude-mentions.yml b/.github/workflows/claude-mentions.yml new file mode 100644 index 00000000..7ed6d234 --- /dev/null +++ b/.github/workflows/claude-mentions.yml @@ -0,0 +1,56 @@ +name: Claude Mentions Handler + +on: + issue_comment: + types: [created] + pull_request_review_comment: + types: [created] + issues: + types: [opened] + pull_request: + types: [opened] + +jobs: + claude-mention: + # Only run if the comment mentions @claude + if: | + (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'issues' && contains(github.event.issue.body, '@claude')) || + (github.event_name == 'pull_request' && contains(github.event.pull_request.body, '@claude')) + + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + issues: write + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Respond to Claude mention + id: claude-response + uses: anthropics/claude-code-action@beta + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + + # Optional: Specify model (defaults to Claude Sonnet 4, uncomment for Claude Opus 4.1) + # model: "claude-opus-4-1-20250805" + + # Enable interactive mode for mentions + mention_mode: true + + # Optional: Allow Claude to use specific tools when responding + # allowed_tools: "Read,Grep,Bash(ls),Bash(cat)" + + # Optional: Custom system prompt for mentions + system_prompt: | + You are Claude, an AI assistant helping with code and technical questions. + When mentioned with @claude, respond helpfully to the user's question or request. + Be concise but thorough in your responses. + If asked to review code, provide constructive feedback. + If asked technical questions, provide clear explanations with examples when helpful. \ No newline at end of file diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml new file mode 100644 index 00000000..bc773072 --- /dev/null +++ b/.github/workflows/claude.yml @@ -0,0 +1,64 @@ +name: Claude Code + +on: + issue_comment: + types: [created] + pull_request_review_comment: + types: [created] + issues: + types: [opened, assigned] + pull_request_review: + types: [submitted] + +jobs: + claude: + if: | + (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) || + (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude'))) + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + issues: read + id-token: write + actions: read # Required for Claude to read CI results on PRs + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Run Claude Code + id: claude + uses: anthropics/claude-code-action@beta + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + + # This is an optional setting that allows Claude to read CI results on PRs + additional_permissions: | + actions: read + + # Optional: Specify model (defaults to Claude Sonnet 4, uncomment for Claude Opus 4.1) + # model: "claude-opus-4-1-20250805" + + # Optional: Customize the trigger phrase (default: @claude) + # trigger_phrase: "/claude" + + # Optional: Trigger when specific user is assigned to an issue + # assignee_trigger: "claude-bot" + + # Optional: Allow Claude to run specific commands + # allowed_tools: "Bash(npm install),Bash(npm run build),Bash(npm run test:*),Bash(npm run lint:*)" + + # Optional: Add custom instructions for Claude to customize its behavior for your project + # custom_instructions: | + # Follow our coding standards + # Ensure all new code has tests + # Use TypeScript for new files + + # Optional: Custom environment variables for Claude + # claude_env: | + # NODE_ENV: test + diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..63343f2b --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,96 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Essential Commands + +### Development Setup +```bash +# Install dependencies +uv sync + +# Add new dependencies +uv add package_name + +# Set up environment variables +cp .env.example .env +# Edit .env to add your ANTHROPIC_API_KEY +``` + +### Running the Application +```bash +# Quick start (recommended) +chmod +x run.sh +./run.sh + +# Manual start +cd backend && uv run uvicorn app:app --reload --port 8000 +``` + +### Development Commands +```bash +# Run from backend directory +cd backend && uv run uvicorn app:app --reload --port 8000 + +# Test API endpoints directly +curl http://localhost:8000/api/courses +curl -X POST http://localhost:8000/api/query -H "Content-Type: application/json" -d '{"query":"your question here"}' +``` + +## Architecture Overview + +This is a **RAG (Retrieval-Augmented Generation) System** with a three-layer architecture: + +### Core RAG Pipeline +1. **Document Processing**: Course transcripts → chunked text with metadata +2. **Vector Storage**: ChromaDB stores embeddings for semantic search +3. **AI Generation**: Claude API generates contextual responses using retrieved content +4. **Tool Integration**: AI can dynamically search the knowledge base using tools + +### Key Components + +**RAGSystem (`rag_system.py`)** - Central orchestrator that coordinates: +- Document processing and chunking +- Vector storage operations +- AI response generation with tool access +- Session management for conversation history + +**Tool-Based Search Architecture** - The system uses a tool-based approach where: +- `ToolManager` registers and manages available tools +- `CourseSearchTool` performs content searches within course materials +- `CourseOutlineTool` retrieves complete course structures with lesson lists +- Claude API calls tools dynamically during response generation +- Tools return sources that are tracked and returned to frontend with links + +**Data Models** (`models.py`): +- `Course`: Contains title, instructor, lessons list +- `CourseChunk`: Text chunks with course/lesson metadata for vector storage +- `Lesson`: Individual lessons with titles and optional links + +### Configuration (`config.py`) +Key settings: +- `CHUNK_SIZE: 800` - Text chunk size for vector storage +- `CHUNK_OVERLAP: 100` - Overlap between chunks +- `MAX_RESULTS: 5` - Vector search result limit +- `MAX_HISTORY: 2` - Conversation memory depth + +### Data Flow +1. Course documents in `docs/` are processed into `CourseChunk` objects +2. Chunks are embedded and stored in ChromaDB (`./chroma_db/`) +3. User queries trigger tool-based searches via Claude API +4. Retrieved chunks provide context for AI response generation +5. Session history maintains conversation continuity + +### Frontend Integration +- FastAPI serves both API endpoints (`/api/*`) and static frontend files +- Frontend communicates via `/api/query` for chat and `/api/courses` for statistics +- CORS configured for development with live reload support + +## Environment Requirements + +Required environment variable: +``` +ANTHROPIC_API_KEY=your_anthropic_api_key_here +``` + +The system expects course documents in `docs/` folder as `.txt`, `.pdf`, or `.docx` files. diff --git a/backend/ai_generator.py b/backend/ai_generator.py index 0363ca90..b08b2196 100644 --- a/backend/ai_generator.py +++ b/backend/ai_generator.py @@ -1,25 +1,73 @@ import anthropic -from typing import List, Optional, Dict, Any +from typing import List, Optional, Dict, Any, Tuple + +class ToolCallState: + """Tracks the state of tool calls across multiple rounds""" + + def __init__(self, max_rounds: int = 2): + self.max_rounds = max_rounds + self.current_round = 0 + self.tool_calls_made = [] + + def can_make_more_calls(self) -> bool: + """Check if more tool calls can be made""" + return self.current_round < self.max_rounds + + def increment_round(self): + """Increment the current round counter""" + self.current_round += 1 + + def add_tool_call(self, tool_name: str, params: Dict[str, Any], result: str): + """Record a tool call that was made""" + self.tool_calls_made.append({ + 'round': self.current_round, + 'tool': tool_name, + 'params': params, + 'result_length': len(result) if result else 0 + }) class AIGenerator: """Handles interactions with Anthropic's Claude API for generating responses""" # Static system prompt to avoid rebuilding on each call - SYSTEM_PROMPT = """ You are an AI assistant specialized in course materials and educational content with access to a comprehensive search tool for course information. + SYSTEM_PROMPT = """ You are an AI assistant specialized in course materials and educational content with access to comprehensive tools for course information. + +Available Tools: +1. **get_course_outline**: Retrieves complete course structure with title, link, and all lessons + - Use for: course outlines, syllabus queries, lesson lists, course structure questions + - Returns: Course title, course link, and numbered lesson list + +2. **search_course_content**: Searches within course materials for specific content + - Use for: detailed content questions, specific topics, lesson details + - Returns: Relevant content excerpts from course materials + +Tool Usage Guidelines: +- **Course outline/structure questions**: Use get_course_outline tool +- **Specific content questions**: Use search_course_content tool +- **Sequential tool usage**: You may use tools up to 2 times in sequence to gather comprehensive information +- **First tool call**: Use to get initial information (e.g., course outline, basic search) +- **Second tool call**: Use to refine search based on first results or explore related topics +- **Complex queries**: Break down multi-part questions using sequential tool calls +- Synthesize tool results into accurate, fact-based responses +- If tools yield no results, state this clearly without offering alternatives -Search Tool Usage: -- Use the search tool **only** for questions about specific course content or detailed educational materials -- **One search per query maximum** -- Synthesize search results into accurate, fact-based responses -- If search yields no results, state this clearly without offering alternatives +Examples of multi-step tool usage: +- To find courses with similar topics to a specific lesson: First get the course outline to identify the lesson, then search for that topic +- To compare course structures: Get outline of first course, then get outline of second course +- To find detailed content after overview: First search broadly, then search for specific details Response Protocol: -- **General knowledge questions**: Answer using existing knowledge without searching -- **Course-specific questions**: Search first, then answer +- **General knowledge questions**: Answer using existing knowledge without tools +- **Course-specific questions**: Use appropriate tool first, then answer - **No meta-commentary**: - - Provide direct answers only — no reasoning process, search explanations, or question-type analysis - - Do not mention "based on the search results" + - Provide direct answers only — no reasoning process, tool explanations, or question-type analysis + - Do not mention "based on the search results" or "using the outline tool" +When presenting course outlines: +- Display the course title prominently +- Include the course link if available +- List all lessons with their numbers and titles +- Keep formatting clean and readable All responses must be: 1. **Brief, Concise and focused** - Get to the point quickly @@ -43,15 +91,17 @@ def __init__(self, api_key: str, model: str): def generate_response(self, query: str, conversation_history: Optional[str] = None, tools: Optional[List] = None, - tool_manager=None) -> str: + tool_manager=None, + max_tool_rounds: int = 2) -> str: """ - Generate AI response with optional tool usage and conversation context. + Generate AI response with optional sequential tool usage and conversation context. Args: query: The user's question or request conversation_history: Previous messages for context tools: Available tools the AI can use tool_manager: Manager to execute tools + max_tool_rounds: Maximum number of sequential tool calls allowed Returns: Generated response as string @@ -64,70 +114,116 @@ def generate_response(self, query: str, else self.SYSTEM_PROMPT ) - # Prepare API call parameters efficiently - api_params = { - **self.base_params, - "messages": [{"role": "user", "content": query}], - "system": system_content - } - - # Add tools if available - if tools: - api_params["tools"] = tools - api_params["tool_choice"] = {"type": "auto"} + # Initialize message history and tool state + messages = [{"role": "user", "content": query}] + tool_state = ToolCallState(max_rounds=max_tool_rounds) - # Get response from Claude - response = self.client.messages.create(**api_params) - - # Handle tool execution if needed - if response.stop_reason == "tool_use" and tool_manager: - return self._handle_tool_execution(response, api_params, tool_manager) + # Process tool calls iteratively + while tool_state.can_make_more_calls(): + # Prepare API call parameters with tools available + api_params = { + **self.base_params, + "messages": messages, + "system": system_content + } + + # Add tools if available and we can still make tool calls + if tools and tool_manager: + api_params["tools"] = tools + api_params["tool_choice"] = {"type": "auto"} + + # Get response from Claude + response = self.client.messages.create(**api_params) + + # Check if Claude wants to use tools + if response.stop_reason == "tool_use": + # Execute tools and update messages + messages, tools_executed = self._execute_tool_round( + response, messages, tool_manager, tool_state + ) + + # If no tools were executed (error case), break + if not tools_executed: + break + + # Increment round counter + tool_state.increment_round() + else: + # Claude doesn't want to use tools, return the response + return response.content[0].text - # Return direct response - return response.content[0].text + # Max rounds reached or no more tool calls needed - make final call without tools + return self._make_final_response(messages, system_content) - def _handle_tool_execution(self, initial_response, base_params: Dict[str, Any], tool_manager): + def _execute_tool_round(self, response, messages: List[Dict], tool_manager, tool_state: ToolCallState) -> Tuple[List[Dict], bool]: """ - Handle execution of tool calls and get follow-up response. + Execute a round of tool calls and update message history. Args: - initial_response: The response containing tool use requests - base_params: Base API parameters + response: The response containing tool use requests + messages: Current message history tool_manager: Manager to execute tools + tool_state: State tracker for tool calls Returns: - Final response text after tool execution + Tuple of (updated messages, whether any tools were executed) """ - # Start with existing messages - messages = base_params["messages"].copy() + # Create a copy of messages to avoid mutation + updated_messages = messages.copy() # Add AI's tool use response - messages.append({"role": "assistant", "content": initial_response.content}) + updated_messages.append({"role": "assistant", "content": response.content}) # Execute all tool calls and collect results tool_results = [] - for content_block in initial_response.content: + tools_executed = False + + for content_block in response.content: if content_block.type == "tool_use": + # Execute the tool tool_result = tool_manager.execute_tool( content_block.name, **content_block.input ) + # Track the tool call + tool_state.add_tool_call( + content_block.name, + content_block.input, + tool_result + ) + + # Add to results tool_results.append({ "type": "tool_result", "tool_use_id": content_block.id, "content": tool_result }) + + tools_executed = True # Add tool results as single message if tool_results: - messages.append({"role": "user", "content": tool_results}) + updated_messages.append({"role": "user", "content": tool_results}) - # Prepare final API call without tools + return updated_messages, tools_executed + + def _make_final_response(self, messages: List[Dict], system_content: str) -> str: + """ + Make a final API call without tools to generate the synthesis response. + + Args: + messages: Complete message history including tool results + system_content: System prompt content + + Returns: + Final response text + """ + # Prepare final API call WITHOUT tools final_params = { **self.base_params, "messages": messages, - "system": base_params["system"] + "system": system_content } # Get final response diff --git a/backend/app.py b/backend/app.py index 5a69d741..85545697 100644 --- a/backend/app.py +++ b/backend/app.py @@ -6,7 +6,7 @@ from fastapi.staticfiles import StaticFiles from fastapi.middleware.trustedhost import TrustedHostMiddleware from pydantic import BaseModel -from typing import List, Optional +from typing import List, Optional, Dict, Any import os from config import config @@ -43,7 +43,7 @@ class QueryRequest(BaseModel): class QueryResponse(BaseModel): """Response model for course queries""" answer: str - sources: List[str] + sources: List[Dict[str, Any]] # Changed to support sources with links session_id: str class CourseStats(BaseModel): @@ -85,6 +85,18 @@ async def get_course_stats(): except Exception as e: raise HTTPException(status_code=500, detail=str(e)) +@app.post("/api/session/clear") +async def clear_session(session_id: Optional[str] = None): + """Clear a session's conversation history""" + try: + if session_id: + rag_system.session_manager.clear_session(session_id) + # Always create and return a new session + new_session_id = rag_system.session_manager.create_session() + return {"session_id": new_session_id, "status": "success"} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + @app.on_event("startup") async def startup_event(): """Load initial documents on startup""" diff --git a/backend/rag_system.py b/backend/rag_system.py index 50d848c8..1a79eb74 100644 --- a/backend/rag_system.py +++ b/backend/rag_system.py @@ -4,7 +4,7 @@ from vector_store import VectorStore from ai_generator import AIGenerator from session_manager import SessionManager -from search_tools import ToolManager, CourseSearchTool +from search_tools import ToolManager, CourseSearchTool, CourseOutlineTool from models import Course, Lesson, CourseChunk class RAGSystem: @@ -22,7 +22,9 @@ def __init__(self, config): # Initialize search tools self.tool_manager = ToolManager() self.search_tool = CourseSearchTool(self.vector_store) + self.outline_tool = CourseOutlineTool(self.vector_store) self.tool_manager.register_tool(self.search_tool) + self.tool_manager.register_tool(self.outline_tool) def add_course_document(self, file_path: str) -> Tuple[Course, int]: """ diff --git a/backend/search_tools.py b/backend/search_tools.py index adfe8235..3cc44cef 100644 --- a/backend/search_tools.py +++ b/backend/search_tools.py @@ -88,7 +88,7 @@ def execute(self, query: str, course_name: Optional[str] = None, lesson_number: def _format_results(self, results: SearchResults) -> str: """Format search results with course and lesson context""" formatted = [] - sources = [] # Track sources for the UI + sources = [] # Track sources for the UI with links for doc, meta in zip(results.documents, results.metadata): course_title = meta.get('course_title', 'unknown') @@ -100,11 +100,22 @@ def _format_results(self, results: SearchResults) -> str: header += f" - Lesson {lesson_num}" header += "]" - # Track source for the UI - source = course_title + # Track source for the UI with link + source_text = course_title if lesson_num is not None: - source += f" - Lesson {lesson_num}" - sources.append(source) + source_text += f" - Lesson {lesson_num}" + + # Get lesson link from vector store + lesson_link = None + if lesson_num is not None: + lesson_link = self.store.get_lesson_link(course_title, lesson_num) + + # Create source dictionary with text and optional link + source_data = { + "text": source_text, + "link": lesson_link + } + sources.append(source_data) formatted.append(f"{header}\n{doc}") @@ -113,6 +124,88 @@ def _format_results(self, results: SearchResults) -> str: return "\n\n".join(formatted) +class CourseOutlineTool(Tool): + """Tool for retrieving complete course outlines with lessons""" + + def __init__(self, vector_store: VectorStore): + self.store = vector_store + self.last_sources = [] # Track sources from last search + + def get_tool_definition(self) -> Dict[str, Any]: + """Return Anthropic tool definition for this tool""" + return { + "name": "get_course_outline", + "description": "Get complete course outline including title, link, and all lessons with their numbers and titles", + "input_schema": { + "type": "object", + "properties": { + "course_title": { + "type": "string", + "description": "Course title to get outline for (partial matches work, e.g. 'MCP', 'Computer Use')" + } + }, + "required": ["course_title"] + } + } + + def execute(self, course_title: str) -> str: + """ + Execute the course outline tool. + + Args: + course_title: Course name to get outline for + + Returns: + Formatted course outline or error message + """ + # Get course outline from vector store + outline = self.store.get_course_outline(course_title) + + # Handle not found + if not outline: + return f"No course found matching '{course_title}'. Please check the course name and try again." + + # Format the response + formatted = [] + sources = [] + + # Add course title and link + formatted.append(f"**Course Title:** {outline['title']}") + + if outline.get('course_link'): + formatted.append(f"**Course Link:** {outline['course_link']}") + # Track source with link + sources.append({ + "text": outline['title'], + "link": outline['course_link'] + }) + else: + # Track source without link + sources.append({ + "text": outline['title'], + "link": None + }) + + if outline.get('instructor'): + formatted.append(f"**Instructor:** {outline['instructor']}") + + # Add lessons + formatted.append(f"\n**Lessons ({outline.get('lesson_count', 0)} total):**") + + if outline.get('lessons'): + for lesson in outline['lessons']: + lesson_num = lesson.get('lesson_number', '?') + lesson_title = lesson.get('lesson_title', 'Unknown') + formatted.append(f" Lesson {lesson_num}: {lesson_title}") + else: + formatted.append(" No lessons found") + + # Store sources for retrieval + self.last_sources = sources + + return "\n".join(formatted) + + class ToolManager: """Manages available tools for the AI""" diff --git a/backend/tests/test_ai_generator.py b/backend/tests/test_ai_generator.py new file mode 100644 index 00000000..248172c0 --- /dev/null +++ b/backend/tests/test_ai_generator.py @@ -0,0 +1,485 @@ +import unittest +from unittest.mock import Mock, MagicMock, patch, call +import sys +import os + +# Add parent directory to path for imports +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from ai_generator import AIGenerator, ToolCallState + + +class TestToolCallState(unittest.TestCase): + """Test the ToolCallState class""" + + def test_initial_state(self): + """Test initial state of ToolCallState""" + state = ToolCallState(max_rounds=2) + self.assertEqual(state.max_rounds, 2) + self.assertEqual(state.current_round, 0) + self.assertTrue(state.can_make_more_calls()) + self.assertEqual(len(state.tool_calls_made), 0) + + def test_increment_round(self): + """Test round incrementing""" + state = ToolCallState(max_rounds=2) + state.increment_round() + self.assertEqual(state.current_round, 1) + self.assertTrue(state.can_make_more_calls()) + + state.increment_round() + self.assertEqual(state.current_round, 2) + self.assertFalse(state.can_make_more_calls()) + + def test_add_tool_call(self): + """Test adding tool call records""" + state = ToolCallState() + state.add_tool_call("search_tool", {"query": "test"}, "result text") + + self.assertEqual(len(state.tool_calls_made), 1) + self.assertEqual(state.tool_calls_made[0]['tool'], "search_tool") + self.assertEqual(state.tool_calls_made[0]['params'], {"query": "test"}) + self.assertEqual(state.tool_calls_made[0]['result_length'], 11) + + +class TestAIGenerator(unittest.TestCase): + """Test the AIGenerator class with sequential tool calling""" + + def setUp(self): + """Set up test fixtures""" + self.api_key = "test-api-key" + self.model = "claude-3-opus-20240229" + + # Create generator with mocked client + with patch('ai_generator.anthropic.Anthropic'): + self.generator = AIGenerator(self.api_key, self.model) + self.mock_client = Mock() + self.generator.client = self.mock_client + + def test_no_tools_needed(self): + """Test direct response when no tools are needed""" + # Mock response without tool use + mock_response = Mock() + mock_response.stop_reason = "end_turn" + mock_response.content = [Mock(text="Direct answer to question")] + + self.mock_client.messages.create.return_value = mock_response + + # Call generate_response + result = self.generator.generate_response( + query="What is Python?", + tools=None, + tool_manager=None + ) + + # Verify result + self.assertEqual(result, "Direct answer to question") + + # Verify only one API call was made + self.assertEqual(self.mock_client.messages.create.call_count, 1) + + def test_single_tool_call(self): + """Test backward compatibility with single tool call""" + # Mock tool manager + mock_tool_manager = Mock() + mock_tool_manager.execute_tool.return_value = "Tool result content" + + # Mock tools + mock_tools = [{"name": "search_tool", "description": "Search tool"}] + + # Mock first response with tool use + mock_response1 = Mock() + mock_response1.stop_reason = "tool_use" + mock_tool_block = Mock() + mock_tool_block.type = "tool_use" + mock_tool_block.name = "search_tool" + mock_tool_block.input = {"query": "test query"} + mock_tool_block.id = "tool_123" + mock_response1.content = [mock_tool_block] + + # Mock second response after tool execution (no more tools needed) + mock_response2 = Mock() + mock_response2.stop_reason = "end_turn" + mock_response2.content = [Mock(text="Final answer based on tool results")] + + # Mock third response (would be final synthesis if Claude used another tool) + mock_response3 = Mock() + mock_response3.stop_reason = "end_turn" + mock_response3.content = [Mock(text="Should not reach here")] + + # Set up mock to return different responses + self.mock_client.messages.create.side_effect = [mock_response1, mock_response2, mock_response3] + + # Call generate_response + result = self.generator.generate_response( + query="Search for Python tutorials", + tools=mock_tools, + tool_manager=mock_tool_manager + ) + + # Verify result + self.assertEqual(result, "Final answer based on tool results") + + # Verify tool was executed + mock_tool_manager.execute_tool.assert_called_once_with( + "search_tool", query="test query" + ) + + # Verify only two API calls were made (not three) + self.assertEqual(self.mock_client.messages.create.call_count, 2) + + # Verify second call DOES include tools (can still make another tool call) + second_call_args = self.mock_client.messages.create.call_args_list[1][1] + self.assertIn("tools", second_call_args) + + # Verify Claude chose not to use more tools (stop_reason was end_turn) + + def test_sequential_tool_calls(self): + """Test sequential tool calling with two rounds""" + # Mock tool manager + mock_tool_manager = Mock() + mock_tool_manager.execute_tool.side_effect = [ + "First tool result", + "Second tool result" + ] + + # Mock tools + mock_tools = [ + {"name": "get_outline", "description": "Get course outline"}, + {"name": "search_content", "description": "Search content"} + ] + + # Mock first response - first tool call + mock_response1 = Mock() + mock_response1.stop_reason = "tool_use" + mock_tool1 = Mock() + mock_tool1.type = "tool_use" + mock_tool1.name = "get_outline" + mock_tool1.input = {"course": "Python"} + mock_tool1.id = "tool_1" + mock_response1.content = [mock_tool1] + + # Mock second response - second tool call based on first results + mock_response2 = Mock() + mock_response2.stop_reason = "tool_use" + mock_tool2 = Mock() + mock_tool2.type = "tool_use" + mock_tool2.name = "search_content" + mock_tool2.input = {"query": "lesson 4 topic"} + mock_tool2.id = "tool_2" + mock_response2.content = [mock_tool2] + + # Mock final response after all tools + mock_response3 = Mock() + mock_response3.stop_reason = "end_turn" + mock_response3.content = [Mock(text="Final comprehensive answer")] + + # Set up mock to return different responses + self.mock_client.messages.create.side_effect = [ + mock_response1, mock_response2, mock_response3 + ] + + # Call generate_response + result = self.generator.generate_response( + query="Find courses similar to lesson 4 of Python course", + tools=mock_tools, + tool_manager=mock_tool_manager, + max_tool_rounds=2 + ) + + # Verify result + self.assertEqual(result, "Final comprehensive answer") + + # Verify both tools were executed in sequence + self.assertEqual(mock_tool_manager.execute_tool.call_count, 2) + mock_tool_manager.execute_tool.assert_any_call("get_outline", course="Python") + mock_tool_manager.execute_tool.assert_any_call("search_content", query="lesson 4 topic") + + # Verify three API calls were made + self.assertEqual(self.mock_client.messages.create.call_count, 3) + + # Verify first two calls included tools + first_call_args = self.mock_client.messages.create.call_args_list[0][1] + self.assertIn("tools", first_call_args) + + second_call_args = self.mock_client.messages.create.call_args_list[1][1] + self.assertIn("tools", second_call_args) + + # Verify final call did NOT include tools + third_call_args = self.mock_client.messages.create.call_args_list[2][1] + self.assertNotIn("tools", third_call_args) + + def test_max_rounds_reached(self): + """Test that tool calling stops after max rounds""" + # Mock tool manager + mock_tool_manager = Mock() + mock_tool_manager.execute_tool.return_value = "Tool result" + + # Mock tools + mock_tools = [{"name": "search_tool", "description": "Search tool"}] + + # Create responses that always want to use tools + mock_tool_response = Mock() + mock_tool_response.stop_reason = "tool_use" + mock_tool = Mock() + mock_tool.type = "tool_use" + mock_tool.name = "search_tool" + mock_tool.input = {"query": "test"} + mock_tool.id = "tool_id" + mock_tool_response.content = [mock_tool] + + # Final response without tools + mock_final_response = Mock() + mock_final_response.stop_reason = "end_turn" + mock_final_response.content = [Mock(text="Final answer after max rounds")] + + # Set up mock to return tool responses then final + self.mock_client.messages.create.side_effect = [ + mock_tool_response, # Round 1 + mock_tool_response, # Round 2 + mock_final_response # Final synthesis + ] + + # Call with max_tool_rounds=2 + result = self.generator.generate_response( + query="Complex query", + tools=mock_tools, + tool_manager=mock_tool_manager, + max_tool_rounds=2 + ) + + # Verify result + self.assertEqual(result, "Final answer after max rounds") + + # Verify exactly 2 tool executions + self.assertEqual(mock_tool_manager.execute_tool.call_count, 2) + + # Verify exactly 3 API calls (2 with tools, 1 without) + self.assertEqual(self.mock_client.messages.create.call_count, 3) + + def test_message_accumulation(self): + """Test that messages accumulate correctly across rounds""" + # Mock tool manager + mock_tool_manager = Mock() + mock_tool_manager.execute_tool.return_value = "Tool result" + + # Mock tools + mock_tools = [{"name": "test_tool", "description": "Test tool"}] + + # Mock responses + mock_response1 = Mock() + mock_response1.stop_reason = "tool_use" + mock_tool = Mock() + mock_tool.type = "tool_use" + mock_tool.name = "test_tool" + mock_tool.input = {} + mock_tool.id = "tool_1" + mock_response1.content = [mock_tool] + + mock_response2 = Mock() + mock_response2.stop_reason = "end_turn" + mock_response2.content = [Mock(text="Final")] + + self.mock_client.messages.create.side_effect = [mock_response1, mock_response2] + + # Call generate_response + result = self.generator.generate_response( + query="Test query", + tools=mock_tools, + tool_manager=mock_tool_manager + ) + + # Check that second API call has accumulated messages + second_call_args = self.mock_client.messages.create.call_args_list[1][1] + messages = second_call_args["messages"] + + # Should have: user query, assistant tool call, user tool result + self.assertEqual(len(messages), 3) + self.assertEqual(messages[0]["role"], "user") + self.assertEqual(messages[1]["role"], "assistant") + self.assertEqual(messages[2]["role"], "user") + + # Verify tool result is in the messages + self.assertIsInstance(messages[2]["content"], list) + self.assertEqual(messages[2]["content"][0]["type"], "tool_result") + + def test_error_handling_no_tools_executed(self): + """Test handling when tool execution fails""" + # Mock tool manager that raises an exception + mock_tool_manager = Mock() + mock_tool_manager.execute_tool.side_effect = Exception("Tool error") + + # Mock tools + mock_tools = [{"name": "failing_tool", "description": "Failing tool"}] + + # Mock response with tool use + mock_response1 = Mock() + mock_response1.stop_reason = "tool_use" + mock_tool = Mock() + mock_tool.type = "tool_use" + mock_tool.name = "failing_tool" + mock_tool.input = {} + mock_tool.id = "tool_1" + mock_response1.content = [mock_tool] + + # Mock final response + mock_response2 = Mock() + mock_response2.stop_reason = "end_turn" + mock_response2.content = [Mock(text="Fallback response")] + + self.mock_client.messages.create.side_effect = [mock_response1, mock_response2] + + # Call should handle the error gracefully + with self.assertRaises(Exception): + result = self.generator.generate_response( + query="Test query", + tools=mock_tools, + tool_manager=mock_tool_manager + ) + + def test_conversation_history_preserved(self): + """Test that conversation history is included in all API calls""" + history = "User: Previous question\nAssistant: Previous answer" + + # Mock response without tools + mock_response = Mock() + mock_response.stop_reason = "end_turn" + mock_response.content = [Mock(text="Answer")] + + self.mock_client.messages.create.return_value = mock_response + + # Call with conversation history + result = self.generator.generate_response( + query="New question", + conversation_history=history + ) + + # Verify system prompt includes history + call_args = self.mock_client.messages.create.call_args[1] + self.assertIn(history, call_args["system"]) + + +class TestComplexScenarios(unittest.TestCase): + """Test complex real-world scenarios""" + + def setUp(self): + """Set up test fixtures""" + with patch('ai_generator.anthropic.Anthropic'): + self.generator = AIGenerator("test-key", "test-model") + self.mock_client = Mock() + self.generator.client = self.mock_client + + def test_course_comparison_scenario(self): + """Test comparing two courses using sequential tool calls""" + # Mock tool manager + mock_tool_manager = Mock() + mock_tool_manager.execute_tool.side_effect = [ + "MCP Course Outline: Lesson 1: Intro, Lesson 2: Basics...", + "Computer Use Course Outline: Lesson 1: Setup, Lesson 2: Navigation..." + ] + + # Mock tools + mock_tools = [{"name": "get_course_outline", "description": "Get course outline"}] + + # Mock responses for two sequential outline calls + mock_response1 = Mock() + mock_response1.stop_reason = "tool_use" + tool1 = Mock() + tool1.type = "tool_use" + tool1.name = "get_course_outline" + tool1.input = {"course_title": "MCP"} + tool1.id = "tool_1" + mock_response1.content = [tool1] + + mock_response2 = Mock() + mock_response2.stop_reason = "tool_use" + tool2 = Mock() + tool2.type = "tool_use" + tool2.name = "get_course_outline" + tool2.input = {"course_title": "Computer Use"} + tool2.id = "tool_2" + mock_response2.content = [tool2] + + mock_final = Mock() + mock_final.stop_reason = "end_turn" + mock_final.content = [Mock(text="Both courses cover similar introductory topics...")] + + self.mock_client.messages.create.side_effect = [ + mock_response1, mock_response2, mock_final + ] + + # Execute the comparison + result = self.generator.generate_response( + query="How does the MCP introduction compare to the Computer Use course structure?", + tools=mock_tools, + tool_manager=mock_tool_manager + ) + + # Verify both outlines were retrieved + self.assertEqual(mock_tool_manager.execute_tool.call_count, 2) + + # Verify final synthesis + self.assertIn("similar introductory topics", result) + + def test_find_specific_then_search_scenario(self): + """Test finding specific lesson then searching for related content""" + # Mock tool manager + mock_tool_manager = Mock() + mock_tool_manager.execute_tool.side_effect = [ + "Course: Python Basics\nLesson 4: Object-Oriented Programming", + "Found 3 courses discussing OOP: Java Advanced, C++ Fundamentals, Ruby Design" + ] + + # Mock tools + mock_tools = [ + {"name": "get_course_outline", "description": "Get outline"}, + {"name": "search_course_content", "description": "Search content"} + ] + + # First call gets outline + mock_response1 = Mock() + mock_response1.stop_reason = "tool_use" + tool1 = Mock() + tool1.type = "tool_use" + tool1.name = "get_course_outline" + tool1.input = {"course_title": "Python Basics"} + tool1.id = "tool_1" + mock_response1.content = [tool1] + + # Second call searches based on lesson 4 topic + mock_response2 = Mock() + mock_response2.stop_reason = "tool_use" + tool2 = Mock() + tool2.type = "tool_use" + tool2.name = "search_course_content" + tool2.input = {"query": "Object-Oriented Programming"} + tool2.id = "tool_2" + mock_response2.content = [tool2] + + # Final synthesis + mock_final = Mock() + mock_final.stop_reason = "end_turn" + mock_final.content = [Mock(text="Courses covering similar OOP topics: Java Advanced, C++ Fundamentals, Ruby Design")] + + self.mock_client.messages.create.side_effect = [ + mock_response1, mock_response2, mock_final + ] + + # Execute the complex query + result = self.generator.generate_response( + query="Find courses that discuss the same topic as lesson 4 of Python Basics", + tools=mock_tools, + tool_manager=mock_tool_manager + ) + + # Verify the sequence + calls = mock_tool_manager.execute_tool.call_args_list + self.assertEqual(calls[0][0][0], "get_course_outline") + self.assertEqual(calls[1][0][0], "search_course_content") + + # Verify final result mentions the found courses + self.assertIn("Java Advanced", result) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/backend/tests/test_integration.py b/backend/tests/test_integration.py new file mode 100644 index 00000000..d893c0dd --- /dev/null +++ b/backend/tests/test_integration.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python +"""Integration test to verify sequential tool calling works end-to-end""" + +import sys +import os +from unittest.mock import Mock, patch + +# Add parent directory to path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from ai_generator import AIGenerator, ToolCallState +from search_tools import ToolManager + + +def test_sequential_tool_execution(): + """Test that the system can handle sequential tool calls properly""" + + # Create a mock tool manager + tool_manager = ToolManager() + + # Mock tool 1 - get course outline + mock_outline_tool = Mock() + mock_outline_tool.get_tool_definition.return_value = { + "name": "get_course_outline", + "description": "Get course outline", + "input_schema": { + "type": "object", + "properties": { + "course_title": {"type": "string"} + }, + "required": ["course_title"] + } + } + mock_outline_tool.execute.return_value = """ + Course Title: Python Basics + Lessons: + Lesson 1: Introduction + Lesson 2: Variables and Types + Lesson 3: Control Flow + Lesson 4: Object-Oriented Programming + Lesson 5: Advanced Topics + """ + + # Mock tool 2 - search content + mock_search_tool = Mock() + mock_search_tool.get_tool_definition.return_value = { + "name": "search_course_content", + "description": "Search course content", + "input_schema": { + "type": "object", + "properties": { + "query": {"type": "string"} + }, + "required": ["query"] + } + } + mock_search_tool.execute.return_value = """ + Found 3 courses discussing Object-Oriented Programming: + - Java Advanced: Full OOP implementation + - C++ Fundamentals: Classes and inheritance + - Ruby Design Patterns: OOP best practices + """ + + # Register mock tools + tool_manager.tools = { + "get_course_outline": mock_outline_tool, + "search_course_content": mock_search_tool + } + + # Create AI generator with mock API key + generator = AIGenerator("test-key", "claude-3-opus-20240229") + + # Mock the Anthropic client + mock_client = Mock() + generator.client = mock_client + + # Mock response 1: Get outline + mock_response1 = Mock() + mock_response1.stop_reason = "tool_use" + tool_call1 = Mock() + tool_call1.type = "tool_use" + tool_call1.name = "get_course_outline" + tool_call1.input = {"course_title": "Python Basics"} + tool_call1.id = "tool_1" + mock_response1.content = [tool_call1] + + # Mock response 2: Search based on lesson 4 + mock_response2 = Mock() + mock_response2.stop_reason = "tool_use" + tool_call2 = Mock() + tool_call2.type = "tool_use" + tool_call2.name = "search_course_content" + tool_call2.input = {"query": "Object-Oriented Programming"} + tool_call2.id = "tool_2" + mock_response2.content = [tool_call2] + + # Mock final response + mock_final = Mock() + mock_final.stop_reason = "end_turn" + mock_final.content = [Mock(text="Based on lesson 4 of Python Basics (Object-Oriented Programming), I found similar content in Java Advanced, C++ Fundamentals, and Ruby Design Patterns courses.")] + + # Set up the mock sequence + mock_client.messages.create.side_effect = [mock_response1, mock_response2, mock_final] + + # Test query that requires sequential tool calls + query = "Find courses that cover similar topics to lesson 4 of Python Basics" + + # Execute + result = generator.generate_response( + query=query, + tools=tool_manager.get_tool_definitions(), + tool_manager=tool_manager, + max_tool_rounds=2 + ) + + # Verify the result + print("Query:", query) + print("Result:", result) + + # Verify both tools were called + assert mock_outline_tool.execute.called, "Outline tool should have been called" + assert mock_search_tool.execute.called, "Search tool should have been called" + + # Verify the sequence + assert mock_outline_tool.execute.call_args[1]["course_title"] == "Python Basics" + assert mock_search_tool.execute.call_args[1]["query"] == "Object-Oriented Programming" + + # Verify we made 3 API calls total + assert mock_client.messages.create.call_count == 3, f"Expected 3 API calls, got {mock_client.messages.create.call_count}" + + print("\n[PASS] Integration test passed! Sequential tool calling is working correctly.") + print(f" - Made {mock_client.messages.create.call_count} API calls") + print(f" - Executed {mock_outline_tool.execute.call_count + mock_search_tool.execute.call_count} tool calls") + + return True + + +def test_single_tool_still_works(): + """Ensure backward compatibility - single tool calls still work""" + + # Create a mock tool manager + tool_manager = ToolManager() + + # Mock a single search tool + mock_search_tool = Mock() + mock_search_tool.get_tool_definition.return_value = { + "name": "search_course_content", + "description": "Search course content", + "input_schema": { + "type": "object", + "properties": { + "query": {"type": "string"} + }, + "required": ["query"] + } + } + mock_search_tool.execute.return_value = "Found 5 courses about Python" + + tool_manager.tools = {"search_course_content": mock_search_tool} + + # Create AI generator + generator = AIGenerator("test-key", "claude-3-opus-20240229") + mock_client = Mock() + generator.client = mock_client + + # Mock single tool use then done + mock_response1 = Mock() + mock_response1.stop_reason = "tool_use" + tool_call = Mock() + tool_call.type = "tool_use" + tool_call.name = "search_course_content" + tool_call.input = {"query": "Python"} + tool_call.id = "tool_1" + mock_response1.content = [tool_call] + + mock_response2 = Mock() + mock_response2.stop_reason = "end_turn" + mock_response2.content = [Mock(text="I found 5 courses about Python.")] + + mock_client.messages.create.side_effect = [mock_response1, mock_response2] + + # Execute + result = generator.generate_response( + query="Search for Python courses", + tools=tool_manager.get_tool_definitions(), + tool_manager=tool_manager + ) + + print("\n[PASS] Backward compatibility test passed! Single tool calls still work.") + print(f" Result: {result}") + + return True + + +if __name__ == "__main__": + print("Running integration tests for sequential tool calling...\n") + + try: + test_sequential_tool_execution() + test_single_tool_still_works() + print("\n[SUCCESS] All integration tests passed!") + except AssertionError as e: + print(f"\n[FAIL] Test failed: {e}") + sys.exit(1) + except Exception as e: + print(f"\n[ERROR] Unexpected error: {e}") + import traceback + traceback.print_exc() + sys.exit(1) \ No newline at end of file diff --git a/backend/vector_store.py b/backend/vector_store.py index 390abe71..8764b792 100644 --- a/backend/vector_store.py +++ b/backend/vector_store.py @@ -264,4 +264,46 @@ def get_lesson_link(self, course_title: str, lesson_number: int) -> Optional[str return None except Exception as e: print(f"Error getting lesson link: {e}") + + def get_course_outline(self, course_name: str) -> Optional[Dict[str, Any]]: + """ + Get complete course outline including title, link, and all lessons. + + Args: + course_name: Course name to search for (partial matches work) + + Returns: + Dictionary with course outline data or None if not found + """ + import json + + # First resolve the course name to get exact title + course_title = self._resolve_course_name(course_name) + if not course_title: + return None + + try: + # Get course metadata by ID (title is the ID) + results = self.course_catalog.get(ids=[course_title]) + if results and 'metadatas' in results and results['metadatas']: + metadata = results['metadatas'][0] + + # Parse lessons JSON + lessons = [] + lessons_json = metadata.get('lessons_json') + if lessons_json: + lessons = json.loads(lessons_json) + + # Return structured course outline + return { + 'title': metadata.get('title'), + 'course_link': metadata.get('course_link'), + 'instructor': metadata.get('instructor'), + 'lesson_count': metadata.get('lesson_count', 0), + 'lessons': lessons + } + return None + except Exception as e: + print(f"Error getting course outline: {e}") + return None \ No newline at end of file diff --git a/frontend/index.html b/frontend/index.html index f8e25a62..b11a4d92 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -12,13 +12,38 @@
Ask questions about courses, instructors, and content
+Ask questions about courses, instructors, and content
+