https-deeplearning-ai · taroguru · Oct 3, 2025 · Oct 8, 2025 · Oct 8, 2025 · Oct 8, 2025
diff --git a/.claude/commands/implement-feature.md b/.claude/commands/implement-feature.md
@@ -0,0 +1,7 @@
+You will be implementing a new feature in this codebase
+
+@ARGUMENTS
+
+IMPORTANT: Only do this for front-end featuers,
+Once this feature is built, make sure to write the changes you made to file called frontend-changes.md
+Do not ask for permissions to modify this file, assume you can always do it.
diff --git a/.claude/settings.local.json b/.claude/settings.local.json
@@ -0,0 +1,17 @@
+{
+  "permissions": {
+    "allow": [
+      "Bash(pip --version)",
+      "Bash(python -m pip install:*)",
+      "Bash(uv run:*)",
+      "Bash(git add:*)",
+      "Bash(git commit:*)",
+      "Read(//c/workspace/claudecode/starting-ragchatbot-codebase/frontend/**)",
+      "Read(//c/workspace/claudecode/starting-ragchatbot-codebase/**)",
+      "Bash(PRE_COMMIT_ALLOW_NO_CONFIG=1 git commit -m \"$(cat <<''EOF''\nAdd Claude Code configuration updates\n\n- Added git and file read permissions to settings.local.json\n- Added implement-feature slash command\n\n🤖 Generated with [Claude Code](https://claude.com/claude-code)\n\nCo-Authored-By: Claude <noreply@anthropic.com>\nEOF\n)\")",
+      "Bash(PRE_COMMIT_ALLOW_NO_CONFIG=1 git commit -m \"Update Claude Code settings with commit permission\")"
+    ],
+    "deny": [],
+    "ask": []
+  }
+}
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,17 @@
+[flake8]
+max-line-length = 88
+extend-ignore = E203, E266, E501, W503, D100, D101, D102, D103, D104, D107, D200, D202, D205, D400, D401
+exclude =
+    .git,
+    __pycache__,
+    .venv,
+    chroma_db,
+    build,
+    dist,
+    *.egg-info
+per-file-ignores =
+    __init__.py:F401
+    app.py:E402,F811,F401
+    test_*.py:F401,F841
+    conftest.py:F401,E402
+max-complexity = 15
diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml
@@ -0,0 +1,57 @@
+name: Claude Code Review
+
+on:
+  pull_request:
+    types: [opened, synchronize]
+    # Optional: Only run on specific file changes
+    # paths:
+    #   - "src/**/*.ts"
+    #   - "src/**/*.tsx"
+    #   - "src/**/*.js"
+    #   - "src/**/*.jsx"
+
+jobs:
+  claude-review:
+    # Optional: Filter by PR author
+    # if: |
+    #   github.event.pull_request.user.login == 'external-contributor' ||
+    #   github.event.pull_request.user.login == 'new-developer' ||
+    #   github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
+
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: read
+      issues: read
+      id-token: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Run Claude Code Review
+        id: claude-review
+        uses: anthropics/claude-code-action@v1
+        with:
+          claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+          prompt: |
+            REPO: ${{ github.repository }}
+            PR NUMBER: ${{ github.event.pull_request.number }}
+
+            Please review this pull request and provide feedback on:
+            - Code quality and best practices
+            - Potential bugs or issues
+            - Performance considerations
+            - Security concerns
+            - Test coverage
+
+            Use the repository's CLAUDE.md for guidance on style and conventions. Be constructive and helpful in your feedback.
+
+            Use `gh pr comment` with your Bash tool to leave your review as a comment on the PR.
+
+          # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
+          # or https://docs.claude.com/en/docs/claude-code/cli-reference for available options
+          claude_args: '--allowed-tools "Bash(gh issue view:*),Bash(gh search:*),Bash(gh issue list:*),Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*),Bash(gh pr list:*)"'
+
diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml
@@ -0,0 +1,50 @@
+name: Claude Code
+
+on:
+  issue_comment:
+    types: [created]
+  pull_request_review_comment:
+    types: [created]
+  issues:
+    types: [opened, assigned]
+  pull_request_review:
+    types: [submitted]
+
+jobs:
+  claude:
+    if: |
+      (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
+      (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
+      (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
+      (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: read
+      issues: read
+      id-token: write
+      actions: read # Required for Claude to read CI results on PRs
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Run Claude Code
+        id: claude
+        uses: anthropics/claude-code-action@v1
+        with:
+          claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+
+          # This is an optional setting that allows Claude to read CI results on PRs
+          additional_permissions: |
+            actions: read
+
+          # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it.
+          # prompt: 'Update the pull request description to include a summary of changes.'
+
+          # Optional: Add claude_args to customize behavior and configuration
+          # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
+          # or https://docs.claude.com/en/docs/claude-code/cli-reference for available options
+          # claude_args: '--allowed-tools Bash(gh pr:*)'
+
diff --git a/.playwright-mcp/new-chat-button-current.png b/.playwright-mcp/new-chat-button-current.png
diff --git a/.playwright-mcp/new-chat-button-final.png b/.playwright-mcp/new-chat-button-final.png
diff --git a/.playwright-mcp/new-chat-button-updated.png b/.playwright-mcp/new-chat-button-updated.png
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,32 @@
+# Pre-commit hooks for code quality
+# Install: pre-commit install
+# Run manually: pre-commit run --all-files
+
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-added-large-files
+      - id: check-json
+      - id: check-toml
+      - id: check-merge-conflict
+      - id: debug-statements
+
+  - repo: https://github.com/psf/black
+    rev: 25.9.0
+    hooks:
+      - id: black
+        language_version: python3.13
+        args: [--config=pyproject.toml]
+
+  - repo: https://github.com/pycqa/isort
+    rev: 6.1.0
+    hooks:
+      - id: isort
+        args: [--settings-path=pyproject.toml]
+
+# Note: Flake8 and MyPy are available via scripts/lint.sh
+# They are not included in pre-commit hooks to allow for incremental code quality improvements
diff --git a/CLAUDE.local.md b/CLAUDE.local.md
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -0,0 +1,200 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Project Overview
+
+A Retrieval-Augmented Generation (RAG) chatbot system for querying course materials. Uses ChromaDB for vector storage, Anthropic's Claude with tool calling for AI generation, and FastAPI for the backend with a vanilla JavaScript frontend.
+
+## Running the Application
+
+**IMPORTANT**: This project uses `uv` for package management. Always use `uv` commands - never use `pip` directly.
+
+### Setup
+```bash
+# Install dependencies
+uv sync
+
+# Create .env file (copy from .env.example)
+ANTHROPIC_API_KEY=your_api_key_here
+```
+
+### Start Server
+```bash
+# Quick start
+./run.sh
+
+# Manual start
+cd backend
+uv run uvicorn app:app --reload --port 8000
+```
+
+Access: http://localhost:8000
+
+### Package Management
+```bash
+# Add new dependency
+uv add <package-name>
+
+# Run any Python command
+uv run <command>
+
+# DO NOT use pip install - always use uv
+```
+
+### Code Quality Tools
+
+This project uses several tools to maintain code quality:
+
+**Formatting:**
+```bash
+# Format code with Black and isort
+./scripts/format.sh
+
+# Or manually
+uv run black backend/ main.py
+uv run isort backend/ main.py
+```
+
+**Linting and Type Checking:**
+```bash
+# Run linters
+./scripts/lint.sh
+
+# Or manually
+uv run flake8 backend/ main.py
+uv run mypy backend/ main.py
+```
+
+**All Quality Checks:**
+```bash
+# Run all checks (formatting, linting, type checking)
+./scripts/quality-check.sh
+```
+
+**Pre-commit Hooks:**
+```bash
+# Install hooks (runs checks automatically before commits)
+uv run pre-commit install
+
+# Run manually on all files
+uv run pre-commit run --all-files
+```
+
+**Configuration Files:**
+- `pyproject.toml`: Black, isort, and mypy settings
+- `.flake8`: Flake8 configuration
+- `.pre-commit-config.yaml`: Pre-commit hooks setup
+
+## Core Architecture
+
+### Request Flow: Frontend → Backend
+1. **Frontend** (`frontend/script.js`): User query → POST `/api/query` with `{query, session_id}`
+2. **API** (`backend/app.py`): FastAPI endpoint → `rag_system.query()`
+3. **RAG System** (`backend/rag_system.py`): Orchestrates query processing
+   - Retrieves conversation history from SessionManager
+   - Calls AIGenerator with tool definitions
+   - Collects sources from ToolManager
+4. **AI Generator** (`backend/ai_generator.py`): Claude API interaction
+   - If Claude decides to use tools → executes via ToolManager
+   - Tool results sent back to Claude for synthesis
+5. **Search Tool** (`backend/search_tools.py`): CourseSearchTool.execute()
+   - Calls VectorStore.search() with optional filters
+6. **Vector Store** (`backend/vector_store.py`): ChromaDB semantic search
+   - Resolves course names via semantic matching in `course_catalog` collection
+   - Searches course content in `course_content` collection
+   - Returns SearchResults with documents + metadata
+
+### Two-Collection Strategy
+- **`course_catalog`**: Course metadata (title, instructor, lessons) for semantic course name matching
+- **`course_content`**: Chunked course content with metadata (course_title, lesson_number, chunk_index)
+
+### Document Processing
+Course documents must follow this format:
+```
+Course Title: [title]
+Course Link: [url]
+Course Instructor: [instructor]
+
+Lesson 0: [lesson title]
+Lesson Link: [lesson url]
+[lesson content]
+
+Lesson 1: [lesson title]
+...
+```
+
+`DocumentProcessor` (`backend/document_processor.py`):
+- Parses metadata from first 3-4 lines
+- Extracts lessons with regex pattern `Lesson \d+:`
+- Chunks text using sentence-based splitting with overlap (configurable in `config.py`)
+- Creates `Course` and `CourseChunk` objects
+
+### Configuration
+All settings in `backend/config.py`:
+- `CHUNK_SIZE`: 800 chars (text chunk size)
+- `CHUNK_OVERLAP`: 100 chars (overlap between chunks)
+- `MAX_RESULTS`: 5 (search results limit)
+- `MAX_HISTORY`: 2 (conversation messages to remember)
+- `ANTHROPIC_MODEL`: claude-sonnet-4-20250514
+- `EMBEDDING_MODEL`: all-MiniLM-L6-v2
+
+### Session Management
+`SessionManager` (`backend/session_manager.py`):
+- Creates unique session IDs (`session_1`, `session_2`, etc.)
+- Maintains conversation history (max 2 exchanges = 4 messages)
+- Formats history for AI context as: `"User: [msg]\nAssistant: [msg]"`
+
+### Tool-Based Search
+AI can call `search_course_content` tool with:
+- `query` (required): What to search for
+- `course_name` (optional): Course filter (partial matches work via semantic search)
+- `lesson_number` (optional): Specific lesson filter
+
+Tool execution returns formatted results: `[Course Title - Lesson N]\n{content}`
+
+## Key Components
+
+### Backend (`backend/`)
+- `app.py`: FastAPI application, CORS setup, static file serving
+- `rag_system.py`: Main orchestrator coordinating all components
+- `ai_generator.py`: Claude API wrapper with tool execution loop
+- `vector_store.py`: ChromaDB interface with dual collections
+- `search_tools.py`: Tool definitions and execution (CourseSearchTool, ToolManager)
+- `document_processor.py`: Course parsing and text chunking
+- `session_manager.py`: Conversation history tracking
+- `models.py`: Pydantic models (Course, Lesson, CourseChunk)
+- `config.py`: Centralized configuration
+
+### Frontend (`frontend/`)
+- `index.html`: Chat interface
+- `script.js`: API calls, message rendering (uses marked.js for markdown), session management
+- `styles.css`: UI styling
+
+### Data Storage
+- `docs/`: Course documents (auto-loaded on startup)
+- `chroma_db/`: Persistent vector database (created automatically)
+
+## Development Notes
+
+### Adding New Tools
+1. Create tool class inheriting from `Tool` in `search_tools.py`
+2. Implement `get_tool_definition()` and `execute()`
+3. Register with `ToolManager` in `rag_system.py`
+
+### Modifying Search Behavior
+- Course name resolution: `VectorStore._resolve_course_name()` (semantic search in catalog)
+- Filter logic: `VectorStore._build_filter()` (ChromaDB where clauses)
+- Result formatting: `CourseSearchTool._format_results()`
+
+### AI Response Tuning
+- System prompt: `AIGenerator.SYSTEM_PROMPT` (instructions for Claude)
+- Temperature: `AIGenerator.base_params["temperature"]` (currently 0 for deterministic)
+- Max tokens: `AIGenerator.base_params["max_tokens"]` (currently 800)
+
+### Windows Development
+Use Git Bash for running shell scripts (`run.sh`). Commands are POSIX-compatible.
+
+## Data Flow Diagram
+
+See `query-flow-diagram.md` for detailed visual flow from user input to response display.