From 9b5a950cde46aeb1ceb134ff7ea7e65734aef025 Mon Sep 17 00:00:00 2001 From: suhasreddy-northeastern Date: Mon, 20 Jan 2025 21:23:39 -0500 Subject: [PATCH 01/12] Add test_code.py to trigger PR and GitHub Actions --- test_code.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 test_code.py diff --git a/test_code.py b/test_code.py new file mode 100644 index 0000000..96a2613 --- /dev/null +++ b/test_code.py @@ -0,0 +1,13 @@ +def add(a, b): + """ + A simple function to add two numbers. + """ + return a + b + +# Test case to validate the add function +if __name__ == "__main__": + result = add(2, 3) + if result == 5: + print("Test passed!") + else: + print("Test failed!") From b793b251240bda86ee4d5a3c2d58e09648edc67d Mon Sep 17 00:00:00 2001 From: suhasreddy-northeastern Date: Tue, 21 Jan 2025 00:26:15 -0500 Subject: [PATCH 02/12] Test 2 fibo --- test.py | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 test.py diff --git a/test.py b/test.py new file mode 100644 index 0000000..8e63ac1 --- /dev/null +++ b/test.py @@ -0,0 +1,9 @@ +def fibonacci(n): + fib_series = [0, 1] + while len(fib_series) < n: + fib_series.append(fib_series[-1] + fib_series[-2]) + return fib_series + +# Example usage +n = 10 # Number of terms +print(fibonacci(n)) From 22de73bc55e23159d338957d36df14a0c8904cb7 Mon Sep 17 00:00:00 2001 From: suhasreddy-northeastern Date: Tue, 21 Jan 2025 00:48:56 -0500 Subject: [PATCH 03/12] Add ReadMe file --- README.md | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/README.md b/README.md index e69de29..9b9bc22 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,54 @@ +# CodeReviewer.AI + +CodeReviewer.AI is an automated pull request review bot that leverages artificial intelligence to analyze and provide suggestions on code changes. It uses Groq's language model to review and suggest improvements for the code in open pull requests, allowing developers to get feedback on their code changes without manual review. + +## Features +- Automatically fetches open pull requests from a GitHub repository. +- Analyzes code diffs using Groq's `llama-3.3-70b-versatile` model. +- Posts review comments directly to the GitHub pull request with suggestions for improvement. + +## Technologies Used +- **Groq**: We use Groq’s Llama-based model for code review and suggestions. +- **GitHub API**: To interact with the GitHub repository and fetch pull requests. +- **Python**: The main programming language used for developing this bot. +- **GitHub Actions**: For automating the execution of the bot in response to pull requests. + +## Requirements + +You will need the following dependencies: + +- `groq`: For interacting with Groq's API. +- `requests`: For making API requests to GitHub. +- `pygments`: For code syntax highlighting. + +Install the dependencies by running: + +```bash +pip install -r requirements.txtp +``` + +## Setup + +### Groq API Key + +To use the Groq API, you'll need an API key. Set it as an environment variable `GROQ_API_KEY`. If you're using GitHub Actions, you can store it in your repository's secrets. + +### GitHub Token + +A GitHub token is required to authenticate API requests. Set it as an environment variable `GIT_TOKEN`. You can also add it to the repository secrets. + +### Set up GitHub Secrets + +Add the following secrets to your GitHub repository: + +- **GIT_TOKEN**: Your GitHub Personal Access Token (PAT). +- **GROQ_API_KEY**: Your Groq API key. + +### Install Dependencies + +Before running the bot, install the necessary dependencies by running: + +```bash +pip install -r requirements.txt + + From 613a0f2ce1103f17766c4b0ccaa4667b3b90461b Mon Sep 17 00:00:00 2001 From: suhasreddy-northeastern Date: Tue, 21 Jan 2025 00:58:27 -0500 Subject: [PATCH 04/12] Add ReadMe file --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 9b9bc22..22bd8d0 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ +![Logo](logo.png) # CodeReviewer.AI CodeReviewer.AI is an automated pull request review bot that leverages artificial intelligence to analyze and provide suggestions on code changes. It uses Groq's language model to review and suggest improvements for the code in open pull requests, allowing developers to get feedback on their code changes without manual review. From 73f478eacda749be991ddf5c138fe9d8b70c0cf4 Mon Sep 17 00:00:00 2001 From: suhasramanand Date: Wed, 17 Sep 2025 15:52:35 -0400 Subject: [PATCH 05/12] =?UTF-8?q?=F0=9F=9B=A1=EF=B8=8F=20Enhanced=20securi?= =?UTF-8?q?ty=20features:=20CVE=20scanning,=20vulnerability=20detection,?= =?UTF-8?q?=20human-like=20reviews?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Added comprehensive security pattern matching for SQL injection, XSS, path traversal, etc. - Integrated CVE scanning using Safety database for dependency vulnerabilities - Implemented human-like, concise security reviews with actionable feedback - Enhanced prompt engineering for security-focused analysis - Added visual progress indicators and improved error handling - Updated dependencies: safety, bandit, cve-search-api - Updated README with detailed security scanning capabilities --- README.md | 38 ++++++- requirements.txt | 5 +- src/review_bot.py | 259 +++++++++++++++++++++++++++++++++++++++------- 3 files changed, 261 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index 65da1a8..3600cea 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,15 @@ ![Logo](logo.png) # CodeReviewer.AI -CodeReviewer.AI is an automated pull request review bot that leverages artificial intelligence to analyze and provide suggestions on code changes. It uses Groq's language model to review and suggest improvements for the code in open pull requests, allowing developers to get feedback on their code changes without manual review. +CodeReviewer.AI is an **advanced security-focused** automated pull request review bot that leverages artificial intelligence to analyze code changes for vulnerabilities and security issues. It uses Groq's language model combined with pattern-based security scanning to provide comprehensive security reviews. -## Features -- Automatically fetches open pull requests from a GitHub repository. -- Analyzes code diffs using Groq's `llama-3.3-70b-versatile` model. -- Posts review comments directly to the GitHub pull request with suggestions for improvement. +## πŸ›‘οΈ Security Features +- **Automated vulnerability detection** using regex patterns for common security issues +- **CVE scanning** for dependencies using Safety database +- **Human-like, concise security reviews** with actionable feedback +- **Real-time security analysis** of code changes +- **Pattern-based detection** for SQL injection, XSS, path traversal, hardcoded secrets, and more +- **Dependency vulnerability scanning** for known CVEs ## Technologies Used - **Groq**: We use Groq’s Llama-based model for code review and suggestions. @@ -21,6 +24,9 @@ You will need the following dependencies: - `groq`: For interacting with Groq's API. - `requests`: For making API requests to GitHub. - `pygments`: For code syntax highlighting. +- `safety`: For CVE vulnerability scanning of Python dependencies. +- `bandit`: For static security analysis (optional). +- `cve-search-api`: For additional CVE database access. Install the dependencies by running: @@ -51,5 +57,27 @@ Before running the bot, install the necessary dependencies by running: ```bash pip install -r requirements.txt +``` + +## πŸ” Security Scanning Capabilities + +The bot automatically scans for the following security vulnerabilities: + +### Pattern-Based Detection +- **SQL Injection**: Detects unsafe SQL query construction +- **Cross-Site Scripting (XSS)**: Identifies potential XSS vulnerabilities +- **Path Traversal**: Finds directory traversal attack vectors +- **Hardcoded Secrets**: Detects exposed passwords, API keys, and tokens +- **Unsafe Deserialization**: Identifies dangerous deserialization patterns +- **Command Injection**: Detects shell injection vulnerabilities + +### CVE Scanning +- **Dependency Analysis**: Automatically scans `requirements.txt`, `package.json`, and `Pipfile` changes +- **Known Vulnerabilities**: Checks against Safety database for active CVEs +- **Severity Assessment**: Provides severity ratings for identified vulnerabilities +### AI-Powered Reviews +- **Human-like Feedback**: Generates concise, actionable security reviews +- **Contextual Analysis**: Understands code context for better vulnerability assessment +- **Fix Suggestions**: Provides specific recommendations for security improvements diff --git a/requirements.txt b/requirements.txt index 5d1a268..df9b822 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,6 @@ requests pygments -groq \ No newline at end of file +groq +safety +bandit +cve-search-api \ No newline at end of file diff --git a/src/review_bot.py b/src/review_bot.py index 0f6b9b4..3ba837e 100644 --- a/src/review_bot.py +++ b/src/review_bot.py @@ -1,22 +1,67 @@ import os +import re +import json import requests from groq import Groq -groq_api_key = os.getenv("GROQ_API_KEY") -client = Groq(api_key=os.getenv("GROQ_API_KEY")) +from typing import List, Dict, Optional +import subprocess +import tempfile +# Initialize Groq client +client = Groq(api_key=os.getenv("GROQ_API_KEY")) GIT_TOKEN = os.getenv("GIT_TOKEN") GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY") +# Security patterns to detect vulnerabilities +SECURITY_PATTERNS = { + 'sql_injection': [ + r'execute\s*\(\s*["\'].*%s.*["\']', + r'cursor\.execute\s*\(\s*f["\'].*\{.*\}.*["\']', + r'query\s*=\s*["\'].*\+.*["\']' + ], + 'xss': [ + r'innerHTML\s*=', + r'document\.write\s*\(', + r'eval\s*\(', + r'setTimeout\s*\(\s*["\']' + ], + 'path_traversal': [ + r'\.\./', + r'\.\.\\\\', + r'open\s*\(\s*["\'].*\+.*["\']', + r'file\s*=\s*["\'].*\+.*["\']' + ], + 'hardcoded_secrets': [ + r'password\s*=\s*["\'][^"\']+["\']', + r'api_key\s*=\s*["\'][^"\']+["\']', + r'secret\s*=\s*["\'][^"\']+["\']', + r'token\s*=\s*["\'][^"\']+["\']' + ], + 'unsafe_deserialization': [ + r'pickle\.loads\s*\(', + r'yaml\.load\s*\(', + r'json\.loads\s*\(\s*request\.', + r'eval\s*\(' + ], + 'command_injection': [ + r'os\.system\s*\(', + r'subprocess\.call\s*\(', + r'os\.popen\s*\(', + r'shell\s*=\s*True' + ] +} + def get_latest_pr(): """Fetch the latest pull request number from the repository.""" headers = {"Authorization": f"Bearer {GIT_TOKEN}"} url = f"https://api.github.com/repos/suhasramanand/CodeReviewer.AI/pulls?state=open" - print(f"Requesting PRs from URL: {url}") # Add debug log + print(f"πŸ” Checking for open PRs...") response = requests.get(url, headers=headers) response.raise_for_status() prs = response.json() if prs: + print(f"βœ… Found PR #{prs[0]['number']}: {prs[0]['title']}") return prs[0]['number'] else: raise Exception("No open pull requests found.") @@ -29,55 +74,199 @@ def get_diff(pr_number): response.raise_for_status() return response.json() -def review_code(file_diffs): - """Analyze code changes using Groq's LLaMA model.""" - comments = [] - for file in file_diffs: - file_name = file["filename"] - patch = file.get("patch") - if not patch: - continue +def check_cve_vulnerabilities(dependencies: List[str]) -> List[Dict]: + """Check for known CVEs in dependencies using safety.""" + vulnerabilities = [] + try: + # Create a temporary requirements file + with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: + f.write('\n'.join(dependencies)) + temp_file = f.name + + # Run safety check + result = subprocess.run(['safety', 'check', '-r', temp_file, '--json'], + capture_output=True, text=True, timeout=30) + + if result.returncode != 0 and result.stdout: + try: + safety_data = json.loads(result.stdout) + for vuln in safety_data: + vulnerabilities.append({ + 'package': vuln.get('package_name', 'Unknown'), + 'version': vuln.get('analyzed_version', 'Unknown'), + 'cve': vuln.get('advisory', 'No CVE ID'), + 'severity': vuln.get('severity', 'Unknown'), + 'description': vuln.get('description', 'No description available') + }) + except json.JSONDecodeError: + pass + + # Clean up temp file + os.unlink(temp_file) + + except Exception as e: + print(f"⚠️ CVE check failed: {e}") + + return vulnerabilities - prompt = ( - f"Review the following code changes in the file '{file_name}':\n\n" - f"{patch}\n\n" - f"### Perform the following tasks:\n" - f"1. Analyze the **time complexity** and **space complexity** of the functions or logic in the code.\n" - f"2. Identify any **potential vulnerabilities**, such as:\n" - f" - Unvalidated input\n" - f" - API abuse risks\n" - f" - Hardcoded sensitive information\n" - f" - Improper error handling\n" - f"3. Suggest improvements to **optimize performance** and **enhance security**.\n" - f"4. Provide general feedback on code quality, readability, and maintainability." - ) +def scan_for_security_vulnerabilities(code_content: str, file_name: str) -> List[Dict]: + """Scan code for security vulnerabilities using pattern matching.""" + vulnerabilities = [] + + for vuln_type, patterns in SECURITY_PATTERNS.items(): + for pattern in patterns: + matches = re.finditer(pattern, code_content, re.IGNORECASE | re.MULTILINE) + for match in matches: + line_num = code_content[:match.start()].count('\n') + 1 + vulnerabilities.append({ + 'type': vuln_type, + 'line': line_num, + 'code': match.group(0).strip(), + 'severity': 'HIGH' if vuln_type in ['sql_injection', 'command_injection', 'unsafe_deserialization'] else 'MEDIUM', + 'file': file_name + }) + + return vulnerabilities + +def extract_dependencies_from_diff(patch: str) -> List[str]: + """Extract dependencies from requirements.txt changes.""" + dependencies = [] + lines = patch.split('\n') + for line in lines: + if line.startswith('+') and not line.startswith('+++'): + dep_line = line[1:].strip() + if dep_line and not dep_line.startswith('#'): + dependencies.append(dep_line) + return dependencies + +def generate_human_review(file_name: str, patch: str, vulnerabilities: List[Dict], cve_vulns: List[Dict]) -> str: + """Generate human-like, concise security review using AI.""" + + # Count vulnerabilities by severity + high_vulns = [v for v in vulnerabilities if v['severity'] == 'HIGH'] + medium_vulns = [v for v in vulnerabilities if v['severity'] == 'MEDIUM'] + + # Create vulnerability summary + vuln_summary = "" + if high_vulns: + vuln_summary += f"🚨 **{len(high_vulns)} HIGH severity issues found**\n" + if medium_vulns: + vuln_summary += f"⚠️ **{len(medium_vulns)} MEDIUM severity issues found**\n" + if cve_vulns: + vuln_summary += f"πŸ” **{len(cve_vulns)} known CVEs in dependencies**\n" + + if not vulnerabilities and not cve_vulns: + vuln_summary = "βœ… **No obvious security issues detected**\n" + + # Create detailed vulnerability list + vuln_details = "" + for vuln in vulnerabilities[:5]: # Limit to top 5 for conciseness + vuln_details += f"β€’ **Line {vuln['line']}**: {vuln['type'].replace('_', ' ').title()} - `{vuln['code'][:50]}...`\n" + + for cve in cve_vulns[:3]: # Limit to top 3 CVEs + vuln_details += f"β€’ **{cve['package']} {cve['version']}**: {cve['cve']} ({cve['severity']})\n" + + prompt = f"""You are a senior security engineer reviewing code changes. Be concise, human-like, and focus on security. + +File: {file_name} +Code changes: +{patch[:2000]}... + +Security scan results: +{vuln_summary} +{vuln_details} +Provide a brief, actionable security review (max 3-4 sentences). Focus on: +1. Critical security issues that need immediate attention +2. Specific fixes for vulnerabilities found +3. Best practices to implement +Be conversational but professional. Use emojis sparingly. Prioritize actionable feedback over general advice.""" + + try: chat_completion = client.chat.completions.create( messages=[ - {"role": "system", "content": "You are a professional code reviewer with expertise in performance optimization and secure coding practices."}, + {"role": "system", "content": "You are a senior security engineer. Provide concise, actionable security feedback. Be human-like and direct."}, {"role": "user", "content": prompt} ], - model="llama-3.3-70b-versatile" + model="llama-3.3-70b-versatile", + max_tokens=300, + temperature=0.3 ) + + return chat_completion.choices[0].message.content.strip() + except Exception as e: + return f"πŸ”§ Security scan completed. {vuln_summary}{vuln_details}" - comments.append(f"**{file_name}:**\n{chat_completion.choices[0].message.content}") +def review_code(file_diffs): + """Analyze code changes with enhanced security focus.""" + comments = [] + + for file in file_diffs: + file_name = file["filename"] + patch = file.get("patch", "") + + if not patch: + continue + + print(f"πŸ” Analyzing {file_name} for security issues...") + + # Extract added code for analysis + added_lines = [] + for line in patch.split('\n'): + if line.startswith('+') and not line.startswith('+++'): + added_lines.append(line[1:]) + + added_code = '\n'.join(added_lines) + + # Security vulnerability scanning + vulnerabilities = scan_for_security_vulnerabilities(added_code, file_name) + + # CVE checking for dependencies + cve_vulnerabilities = [] + if 'requirements.txt' in file_name or 'package.json' in file_name or 'Pipfile' in file_name: + dependencies = extract_dependencies_from_diff(patch) + if dependencies: + cve_vulnerabilities = check_cve_vulnerabilities(dependencies) + + # Generate human-like review + review = generate_human_review(file_name, patch, vulnerabilities, cve_vulnerabilities) + + # Format comment with security badge + security_status = "πŸ›‘οΈ SECURE" if not vulnerabilities and not cve_vulnerabilities else "⚠️ SECURITY ISSUES" + comment = f"## {security_status} - {file_name}\n\n{review}" + + comments.append(comment) + return comments def post_review(pr_number, comments): - """Post comments back to the pull request.""" + """Post security-focused comments back to the pull request.""" headers = {"Authorization": f"Bearer {GIT_TOKEN}"} url = f"https://api.github.com/repos/suhasramanand/CodeReviewer.AI/issues/{pr_number}/comments" + for comment in comments: payload = {"body": comment} response = requests.post(url, headers=headers, json=payload) response.raise_for_status() + print(f"βœ… Posted security review comment") if __name__ == "__main__": - pr_number = get_latest_pr() - - diffs = get_diff(pr_number) - - review_comments = review_code(diffs) - - post_review(pr_number, review_comments) + try: + print("πŸ›‘οΈ Starting Security Code Review Bot...") + pr_number = get_latest_pr() + + diffs = get_diff(pr_number) + print(f"πŸ“ Analyzing {len(diffs)} files...") + + review_comments = review_code(diffs) + + if review_comments: + post_review(pr_number, review_comments) + print(f"πŸŽ‰ Security review completed for PR #{pr_number}") + else: + print("ℹ️ No files to review") + + except Exception as e: + print(f"❌ Error: {e}") + raise From 8a26b5f5a582eefdf01988e14a4197edb04eac9e Mon Sep 17 00:00:00 2001 From: suhasramanand Date: Wed, 17 Sep 2025 15:54:14 -0400 Subject: [PATCH 06/12] =?UTF-8?q?=F0=9F=94=A7=20Fix=20dependency=20issue:?= =?UTF-8?q?=20Remove=20non-existent=20cve-search-api=20package?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Removed cve-search-api from requirements.txt (package doesn't exist) - Updated README.md to reflect correct dependencies - Safety package provides sufficient CVE scanning capabilities --- README.md | 1 - requirements.txt | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index 3600cea..b6abd17 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,6 @@ You will need the following dependencies: - `pygments`: For code syntax highlighting. - `safety`: For CVE vulnerability scanning of Python dependencies. - `bandit`: For static security analysis (optional). -- `cve-search-api`: For additional CVE database access. Install the dependencies by running: diff --git a/requirements.txt b/requirements.txt index df9b822..831ea93 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,4 @@ requests pygments groq safety -bandit -cve-search-api \ No newline at end of file +bandit \ No newline at end of file From e4713105b40974dc6c2cc8cf4264e70e47957302 Mon Sep 17 00:00:00 2001 From: suhasramanand Date: Wed, 17 Sep 2025 15:56:23 -0400 Subject: [PATCH 07/12] =?UTF-8?q?=F0=9F=94=A7=20Fix=20GitHub=20API=20authe?= =?UTF-8?q?ntication=20issue?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use GITHUB_REPOSITORY environment variable instead of hardcoded repo name - Updated get_latest_pr(), get_diff(), and post_review() functions - Added GITHUB_REPOSITORY to GitHub Actions workflow environment - This should resolve the 401 Unauthorized error --- .github/workflows/code-review.yml | 1 + src/review_bot.py | 14 ++++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml index f4e6fba..36d1127 100644 --- a/.github/workflows/code-review.yml +++ b/.github/workflows/code-review.yml @@ -25,3 +25,4 @@ jobs: env: GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} GIT_TOKEN: ${{ secrets.GIT_TOKEN }} + GITHUB_REPOSITORY: ${{ github.repository }} diff --git a/src/review_bot.py b/src/review_bot.py index 3ba837e..e31c2e0 100644 --- a/src/review_bot.py +++ b/src/review_bot.py @@ -54,8 +54,10 @@ def get_latest_pr(): """Fetch the latest pull request number from the repository.""" headers = {"Authorization": f"Bearer {GIT_TOKEN}"} - url = f"https://api.github.com/repos/suhasramanand/CodeReviewer.AI/pulls?state=open" - print(f"πŸ” Checking for open PRs...") + # Use GITHUB_REPOSITORY environment variable if available, otherwise fallback to hardcoded value + repo = GITHUB_REPOSITORY or "suhasramanand/CodeReviewer.AI" + url = f"https://api.github.com/repos/{repo}/pulls?state=open" + print(f"πŸ” Checking for open PRs in {repo}...") response = requests.get(url, headers=headers) response.raise_for_status() @@ -69,7 +71,9 @@ def get_latest_pr(): def get_diff(pr_number): """Fetch the pull request diff.""" headers = {"Authorization": f"Bearer {GIT_TOKEN}"} - url = f"https://api.github.com/repos/suhasramanand/CodeReviewer.AI/pulls/{pr_number}/files" + # Use GITHUB_REPOSITORY environment variable if available, otherwise fallback to hardcoded value + repo = GITHUB_REPOSITORY or "suhasramanand/CodeReviewer.AI" + url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/files" response = requests.get(url, headers=headers) response.raise_for_status() return response.json() @@ -243,7 +247,9 @@ def review_code(file_diffs): def post_review(pr_number, comments): """Post security-focused comments back to the pull request.""" headers = {"Authorization": f"Bearer {GIT_TOKEN}"} - url = f"https://api.github.com/repos/suhasramanand/CodeReviewer.AI/issues/{pr_number}/comments" + # Use GITHUB_REPOSITORY environment variable if available, otherwise fallback to hardcoded value + repo = GITHUB_REPOSITORY or "suhasramanand/CodeReviewer.AI" + url = f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments" for comment in comments: payload = {"body": comment} From dc489af430fcb2504f01d0717a1930fd75d545e9 Mon Sep 17 00:00:00 2001 From: suhasramanand Date: Wed, 17 Sep 2025 15:57:47 -0400 Subject: [PATCH 08/12] =?UTF-8?q?=F0=9F=94=A7=20Use=20GitHub=20Actions=20c?= =?UTF-8?q?ontext=20to=20avoid=20API=20authentication=20issues?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use GITHUB_EVENT_NUMBER from GitHub Actions context instead of API calls - Added better error handling and debugging for authentication issues - Enhanced headers with proper Accept and User-Agent - This should bypass the 401 authentication error by using GitHub's context --- .github/workflows/code-review.yml | 1 + src/review_bot.py | 26 ++++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml index 36d1127..ef7b265 100644 --- a/.github/workflows/code-review.yml +++ b/.github/workflows/code-review.yml @@ -26,3 +26,4 @@ jobs: GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} GIT_TOKEN: ${{ secrets.GIT_TOKEN }} GITHUB_REPOSITORY: ${{ github.repository }} + GITHUB_EVENT_NUMBER: ${{ github.event.number }} diff --git a/src/review_bot.py b/src/review_bot.py index e31c2e0..4e72a06 100644 --- a/src/review_bot.py +++ b/src/review_bot.py @@ -53,12 +53,27 @@ def get_latest_pr(): """Fetch the latest pull request number from the repository.""" - headers = {"Authorization": f"Bearer {GIT_TOKEN}"} + headers = { + "Authorization": f"Bearer {GIT_TOKEN}", + "Accept": "application/vnd.github.v3+json", + "User-Agent": "CodeReviewer.AI-Bot" + } # Use GITHUB_REPOSITORY environment variable if available, otherwise fallback to hardcoded value repo = GITHUB_REPOSITORY or "suhasramanand/CodeReviewer.AI" url = f"https://api.github.com/repos/{repo}/pulls?state=open" print(f"πŸ” Checking for open PRs in {repo}...") + print(f"πŸ”‘ Using token: {GIT_TOKEN[:10]}..." if GIT_TOKEN else "❌ No token provided") + response = requests.get(url, headers=headers) + print(f"πŸ“‘ Response status: {response.status_code}") + + if response.status_code == 401: + print("❌ Authentication failed. Please check:") + print(" 1. GIT_TOKEN secret is set correctly") + print(" 2. Token has 'repo' permissions") + print(" 3. Token is not expired") + response.raise_for_status() + response.raise_for_status() prs = response.json() @@ -260,7 +275,14 @@ def post_review(pr_number, comments): if __name__ == "__main__": try: print("πŸ›‘οΈ Starting Security Code Review Bot...") - pr_number = get_latest_pr() + + # Try to get PR number from GitHub Actions context first + pr_number = os.getenv("GITHUB_EVENT_NUMBER") or os.getenv("GITHUB_PR_NUMBER") + if not pr_number: + print("πŸ” No GitHub context found, trying to fetch latest PR...") + pr_number = get_latest_pr() + else: + print(f"πŸ“‹ Using PR number from GitHub Actions context: {pr_number}") diffs = get_diff(pr_number) print(f"πŸ“ Analyzing {len(diffs)} files...") From 5426ad17151dd0d499fa1bc543b0ae57caf99eab Mon Sep 17 00:00:00 2001 From: suhasramanand Date: Wed, 17 Sep 2025 15:59:17 -0400 Subject: [PATCH 09/12] =?UTF-8?q?=F0=9F=94=A7=20Fix=20get=5Fdiff=20functio?= =?UTF-8?q?n=20headers=20to=20match=20get=5Flatest=5Fpr?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Updated get_diff() to use same enhanced headers as get_latest_pr() - Added proper Accept and User-Agent headers - Added debugging for authentication issues - This should resolve the 401 error when fetching PR diff files --- src/review_bot.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/review_bot.py b/src/review_bot.py index 4e72a06..a95928a 100644 --- a/src/review_bot.py +++ b/src/review_bot.py @@ -85,11 +85,26 @@ def get_latest_pr(): def get_diff(pr_number): """Fetch the pull request diff.""" - headers = {"Authorization": f"Bearer {GIT_TOKEN}"} + headers = { + "Authorization": f"Bearer {GIT_TOKEN}", + "Accept": "application/vnd.github.v3+json", + "User-Agent": "CodeReviewer.AI-Bot" + } # Use GITHUB_REPOSITORY environment variable if available, otherwise fallback to hardcoded value repo = GITHUB_REPOSITORY or "suhasramanand/CodeReviewer.AI" url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/files" + print(f"πŸ“ Fetching diff for PR #{pr_number}...") + response = requests.get(url, headers=headers) + print(f"πŸ“‘ Response status: {response.status_code}") + + if response.status_code == 401: + print("❌ Authentication failed when fetching diff. Please check:") + print(" 1. GIT_TOKEN secret is set correctly") + print(" 2. Token has 'repo' permissions") + print(" 3. Token is not expired") + response.raise_for_status() + response.raise_for_status() return response.json() From 4d74de0aeee4561c583586da0085b6fbcb75fe7e Mon Sep 17 00:00:00 2001 From: suhasramanand Date: Wed, 17 Sep 2025 16:01:54 -0400 Subject: [PATCH 10/12] =?UTF-8?q?=F0=9F=94=A7=20Add=20git-based=20diff=20f?= =?UTF-8?q?allback=20to=20avoid=20API=20authentication=20issues?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Added get_diff_from_git() function that uses git command instead of GitHub API - Falls back to GitHub API only if git command fails - This should work even with authentication issues since git diff doesn't need API access - Parses git diff output into GitHub API-compatible format --- src/review_bot.py | 59 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/src/review_bot.py b/src/review_bot.py index a95928a..7c869b6 100644 --- a/src/review_bot.py +++ b/src/review_bot.py @@ -83,8 +83,8 @@ def get_latest_pr(): else: raise Exception("No open pull requests found.") -def get_diff(pr_number): - """Fetch the pull request diff.""" +def get_diff_from_github_api(pr_number): + """Fetch the pull request diff using GitHub API.""" headers = { "Authorization": f"Bearer {GIT_TOKEN}", "Accept": "application/vnd.github.v3+json", @@ -108,6 +108,61 @@ def get_diff(pr_number): response.raise_for_status() return response.json() +def get_diff_from_git(): + """Get diff using git command instead of GitHub API.""" + try: + print("πŸ“ Getting diff using git command...") + # Get the diff between the current branch and the base branch + result = subprocess.run(['git', 'diff', 'origin/main', 'HEAD'], + capture_output=True, text=True, timeout=30) + + if result.returncode == 0: + diff_content = result.stdout + print(f"βœ… Got diff content ({len(diff_content)} characters)") + + # Parse the diff into a format similar to GitHub API response + files = [] + current_file = None + + for line in diff_content.split('\n'): + if line.startswith('diff --git'): + if current_file: + files.append(current_file) + # Extract filename from diff header + parts = line.split() + if len(parts) >= 4: + filename = parts[3][2:] # Remove 'b/' prefix + current_file = { + "filename": filename, + "patch": "" + } + elif current_file and line.startswith(('+', '-', ' ')): + current_file["patch"] += line + "\n" + + if current_file: + files.append(current_file) + + print(f"πŸ“‹ Parsed {len(files)} files from git diff") + return files + else: + print(f"❌ Git diff failed: {result.stderr}") + return [] + + except Exception as e: + print(f"❌ Error getting git diff: {e}") + return [] + +def get_diff(pr_number): + """Get diff using git command first, fallback to GitHub API.""" + # Try git command first (no authentication needed) + files = get_diff_from_git() + + if files: + return files + + print("πŸ”„ Git diff failed, trying GitHub API...") + return get_diff_from_github_api(pr_number) + def check_cve_vulnerabilities(dependencies: List[str]) -> List[Dict]: """Check for known CVEs in dependencies using safety.""" vulnerabilities = [] From 06be5f35567167011e7d9650c02967c883d7c392 Mon Sep 17 00:00:00 2001 From: suhasramanand Date: Wed, 17 Sep 2025 16:06:06 -0400 Subject: [PATCH 11/12] =?UTF-8?q?=F0=9F=9A=80=20Transform=20into=20compreh?= =?UTF-8?q?ensive=20Senior=20Engineer=20Code=20Review=20Bot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Expanded from security-only to full engineering review (security, quality, performance, best practices) - Added comprehensive pattern detection for: * Security vulnerabilities (SQL injection, XSS, secrets, etc.) * Code quality issues (long functions, magic numbers, TODOs, etc.) * Performance problems (N+1 queries, inefficient loops, memory leaks) * Best practices (error handling, validation, hardcoded values) - Made reviews EXTREMELY concise (2-3 words when good, 1-2 lines max for issues) - Updated status badges: βœ… GOOD, 🚨 CRITICAL, ⚠️ ISSUES, πŸ’‘ SUGGESTIONS - Reduced token limit to 80 for ultra-brief responses - Now acts like a real senior engineer doing PR reviews --- src/review_bot.py | 288 ++++++++++++++++++++++++++++++---------------- 1 file changed, 191 insertions(+), 97 deletions(-) diff --git a/src/review_bot.py b/src/review_bot.py index 7c869b6..65d34f5 100644 --- a/src/review_bot.py +++ b/src/review_bot.py @@ -12,43 +12,99 @@ GIT_TOKEN = os.getenv("GIT_TOKEN") GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY") -# Security patterns to detect vulnerabilities -SECURITY_PATTERNS = { - 'sql_injection': [ - r'execute\s*\(\s*["\'].*%s.*["\']', - r'cursor\.execute\s*\(\s*f["\'].*\{.*\}.*["\']', - r'query\s*=\s*["\'].*\+.*["\']' - ], - 'xss': [ - r'innerHTML\s*=', - r'document\.write\s*\(', - r'eval\s*\(', - r'setTimeout\s*\(\s*["\']' - ], - 'path_traversal': [ - r'\.\./', - r'\.\.\\\\', - r'open\s*\(\s*["\'].*\+.*["\']', - r'file\s*=\s*["\'].*\+.*["\']' - ], - 'hardcoded_secrets': [ - r'password\s*=\s*["\'][^"\']+["\']', - r'api_key\s*=\s*["\'][^"\']+["\']', - r'secret\s*=\s*["\'][^"\']+["\']', - r'token\s*=\s*["\'][^"\']+["\']' - ], - 'unsafe_deserialization': [ - r'pickle\.loads\s*\(', - r'yaml\.load\s*\(', - r'json\.loads\s*\(\s*request\.', - r'eval\s*\(' - ], - 'command_injection': [ - r'os\.system\s*\(', - r'subprocess\.call\s*\(', - r'os\.popen\s*\(', - r'shell\s*=\s*True' - ] +# Code quality and security patterns to detect issues +CODE_PATTERNS = { + 'security': { + 'sql_injection': [ + r'execute\s*\(\s*["\'].*%s.*["\']', + r'cursor\.execute\s*\(\s*f["\'].*\{.*\}.*["\']', + r'query\s*=\s*["\'].*\+.*["\']' + ], + 'xss': [ + r'innerHTML\s*=', + r'document\.write\s*\(', + r'eval\s*\(', + r'setTimeout\s*\(\s*["\']' + ], + 'path_traversal': [ + r'\.\./', + r'\.\.\\\\', + r'open\s*\(\s*["\'].*\+.*["\']', + r'file\s*=\s*["\'].*\+.*["\']' + ], + 'hardcoded_secrets': [ + r'password\s*=\s*["\'][^"\']+["\']', + r'api_key\s*=\s*["\'][^"\']+["\']', + r'secret\s*=\s*["\'][^"\']+["\']', + r'token\s*=\s*["\'][^"\']+["\']' + ], + 'unsafe_deserialization': [ + r'pickle\.loads\s*\(', + r'yaml\.load\s*\(', + r'json\.loads\s*\(\s*request\.', + r'eval\s*\(' + ], + 'command_injection': [ + r'os\.system\s*\(', + r'subprocess\.call\s*\(', + r'os\.popen\s*\(', + r'shell\s*=\s*True' + ] + }, + 'code_quality': { + 'long_functions': [ + r'def\s+\w+\([^)]*\):\s*$' + ], + 'magic_numbers': [ + r'\b\d{3,}\b' + ], + 'todo_comments': [ + r'#\s*(TODO|FIXME|HACK|XXX)', + r'//\s*(TODO|FIXME|HACK|XXX)' + ], + 'print_statements': [ + r'print\s*\(', + r'console\.log\s*\(' + ], + 'empty_catches': [ + r'except\s*:.*pass', + r'catch\s*\([^)]*\)\s*\{\s*\}' + ], + 'duplicate_code': [ + r'copy.*paste', + r'duplicate' + ] + }, + 'performance': { + 'n_plus_one': [ + r'for\s+\w+\s+in\s+\w+:\s*\n.*\.query\(', + r'for\s+\w+\s+in\s+\w+:\s*\n.*\.get\(' + ], + 'inefficient_loops': [ + r'for\s+\w+\s+in\s+range\(len\(', + r'\.append\(.*\)\s*in\s+loop' + ], + 'memory_leaks': [ + r'global\s+\w+', + r'static\s+\w+' + ] + }, + 'best_practices': { + 'missing_error_handling': [ + r'def\s+\w+\([^)]*\):\s*\n(?!.*try)', + r'function\s+\w+\([^)]*\)\s*\{\s*(?!.*try)' + ], + 'hardcoded_values': [ + r'localhost', + r'127\.0\.0\.1', + r'http://', + r'https://' + ], + 'missing_validation': [ + r'def\s+\w+\([^)]*\):\s*\n(?!.*if.*is.*None)', + r'function\s+\w+\([^)]*\)\s*\{\s*(?!.*if.*===.*null)' + ] + } } def get_latest_pr(): @@ -198,24 +254,37 @@ def check_cve_vulnerabilities(dependencies: List[str]) -> List[Dict]: return vulnerabilities -def scan_for_security_vulnerabilities(code_content: str, file_name: str) -> List[Dict]: - """Scan code for security vulnerabilities using pattern matching.""" - vulnerabilities = [] +def scan_for_code_issues(code_content: str, file_name: str) -> List[Dict]: + """Scan code for security, quality, performance, and best practice issues.""" + issues = [] - for vuln_type, patterns in SECURITY_PATTERNS.items(): - for pattern in patterns: - matches = re.finditer(pattern, code_content, re.IGNORECASE | re.MULTILINE) - for match in matches: - line_num = code_content[:match.start()].count('\n') + 1 - vulnerabilities.append({ - 'type': vuln_type, - 'line': line_num, - 'code': match.group(0).strip(), - 'severity': 'HIGH' if vuln_type in ['sql_injection', 'command_injection', 'unsafe_deserialization'] else 'MEDIUM', - 'file': file_name - }) + for category, patterns in CODE_PATTERNS.items(): + for issue_type, pattern_list in patterns.items(): + for pattern in pattern_list: + matches = re.finditer(pattern, code_content, re.IGNORECASE | re.MULTILINE) + for match in matches: + line_num = code_content[:match.start()].count('\n') + 1 + + # Determine severity based on category and type + if category == 'security': + severity = 'HIGH' if issue_type in ['sql_injection', 'command_injection', 'unsafe_deserialization'] else 'MEDIUM' + elif category == 'performance': + severity = 'MEDIUM' + elif category == 'code_quality': + severity = 'LOW' if issue_type in ['todo_comments', 'print_statements'] else 'MEDIUM' + else: # best_practices + severity = 'LOW' + + issues.append({ + 'category': category, + 'type': issue_type, + 'line': line_num, + 'code': match.group(0).strip(), + 'severity': severity, + 'file': file_name + }) - return vulnerabilities + return issues def extract_dependencies_from_diff(patch: str) -> List[str]: """Extract dependencies from requirements.txt changes.""" @@ -228,67 +297,84 @@ def extract_dependencies_from_diff(patch: str) -> List[str]: dependencies.append(dep_line) return dependencies -def generate_human_review(file_name: str, patch: str, vulnerabilities: List[Dict], cve_vulns: List[Dict]) -> str: - """Generate human-like, concise security review using AI.""" +def generate_human_review(file_name: str, patch: str, issues: List[Dict], cve_vulns: List[Dict]) -> str: + """Generate human-like, concise code review using AI.""" - # Count vulnerabilities by severity - high_vulns = [v for v in vulnerabilities if v['severity'] == 'HIGH'] - medium_vulns = [v for v in vulnerabilities if v['severity'] == 'MEDIUM'] + # Count issues by severity and category + high_issues = [i for i in issues if i['severity'] == 'HIGH'] + medium_issues = [i for i in issues if i['severity'] == 'MEDIUM'] + low_issues = [i for i in issues if i['severity'] == 'LOW'] - # Create vulnerability summary - vuln_summary = "" - if high_vulns: - vuln_summary += f"🚨 **{len(high_vulns)} HIGH severity issues found**\n" - if medium_vulns: - vuln_summary += f"⚠️ **{len(medium_vulns)} MEDIUM severity issues found**\n" + # Group by category + security_issues = [i for i in issues if i['category'] == 'security'] + quality_issues = [i for i in issues if i['category'] == 'code_quality'] + performance_issues = [i for i in issues if i['category'] == 'performance'] + best_practice_issues = [i for i in issues if i['category'] == 'best_practices'] + + # Create issue summary + issue_summary = "" + if high_issues: + issue_summary += f"🚨 **{len(high_issues)} critical issues**\n" + if medium_issues: + issue_summary += f"⚠️ **{len(medium_issues)} issues**\n" + if low_issues: + issue_summary += f"πŸ’‘ **{len(low_issues)} suggestions**\n" if cve_vulns: - vuln_summary += f"πŸ” **{len(cve_vulns)} known CVEs in dependencies**\n" + issue_summary += f"πŸ” **{len(cve_vulns)} CVEs in dependencies**\n" - if not vulnerabilities and not cve_vulns: - vuln_summary = "βœ… **No obvious security issues detected**\n" + if not issues and not cve_vulns: + issue_summary = "βœ… **All good**\n" - # Create detailed vulnerability list - vuln_details = "" - for vuln in vulnerabilities[:5]: # Limit to top 5 for conciseness - vuln_details += f"β€’ **Line {vuln['line']}**: {vuln['type'].replace('_', ' ').title()} - `{vuln['code'][:50]}...`\n" + # Create detailed issue list (top 3 most critical) + issue_details = "" + for issue in sorted(issues, key=lambda x: ['HIGH', 'MEDIUM', 'LOW'].index(x['severity']))[:3]: + category_emoji = {'security': 'πŸ›‘οΈ', 'code_quality': 'πŸ“', 'performance': '⚑', 'best_practices': '✨'} + emoji = category_emoji.get(issue['category'], 'πŸ“‹') + issue_details += f"β€’ {emoji} **Line {issue['line']}**: {issue['type'].replace('_', ' ').title()}\n" - for cve in cve_vulns[:3]: # Limit to top 3 CVEs - vuln_details += f"β€’ **{cve['package']} {cve['version']}**: {cve['cve']} ({cve['severity']})\n" + for cve in cve_vulns[:2]: # Limit to top 2 CVEs + issue_details += f"β€’ πŸ” **{cve['package']}**: {cve['cve']}\n" - prompt = f"""You are a senior security engineer reviewing code changes. Be concise, human-like, and focus on security. + prompt = f"""You are a senior engineer reviewing PRs. Be EXTREMELY concise and human-like. File: {file_name} Code changes: -{patch[:2000]}... +{patch[:800]}... -Security scan results: -{vuln_summary} -{vuln_details} +Review results: +{issue_summary} +{issue_details} -Provide a brief, actionable security review (max 3-4 sentences). Focus on: -1. Critical security issues that need immediate attention -2. Specific fixes for vulnerabilities found -3. Best practices to implement +Provide a VERY brief review: +- If no issues: Just say "Looks good" or "All clear" (2-3 words max) +- If issues: Mention only the most critical issue in 1-2 lines max +- Be conversational, not formal +- Focus on what matters most -Be conversational but professional. Use emojis sparingly. Prioritize actionable feedback over general advice.""" +Examples: +- "Looks good πŸ‘" +- "All clear" +- "SQL injection on line 15 - use params" +- "Missing error handling" +- "Performance issue - N+1 query" """ try: chat_completion = client.chat.completions.create( messages=[ - {"role": "system", "content": "You are a senior security engineer. Provide concise, actionable security feedback. Be human-like and direct."}, + {"role": "system", "content": "You are a senior engineer. Be EXTREMELY brief. If no issues, just say 'Looks good'. If issues, mention only the most critical problem in 1-2 lines."}, {"role": "user", "content": prompt} ], model="llama-3.3-70b-versatile", - max_tokens=300, + max_tokens=80, temperature=0.3 ) return chat_completion.choices[0].message.content.strip() except Exception as e: - return f"πŸ”§ Security scan completed. {vuln_summary}{vuln_details}" + return f"πŸ”§ Review completed. {issue_summary}{issue_details}" def review_code(file_diffs): - """Analyze code changes with enhanced security focus.""" + """Analyze code changes with comprehensive engineering review.""" comments = [] for file in file_diffs: @@ -298,7 +384,7 @@ def review_code(file_diffs): if not patch: continue - print(f"πŸ” Analyzing {file_name} for security issues...") + print(f"πŸ” Reviewing {file_name}...") # Extract added code for analysis added_lines = [] @@ -308,8 +394,8 @@ def review_code(file_diffs): added_code = '\n'.join(added_lines) - # Security vulnerability scanning - vulnerabilities = scan_for_security_vulnerabilities(added_code, file_name) + # Comprehensive code analysis + issues = scan_for_code_issues(added_code, file_name) # CVE checking for dependencies cve_vulnerabilities = [] @@ -319,11 +405,19 @@ def review_code(file_diffs): cve_vulnerabilities = check_cve_vulnerabilities(dependencies) # Generate human-like review - review = generate_human_review(file_name, patch, vulnerabilities, cve_vulnerabilities) + review = generate_human_review(file_name, patch, issues, cve_vulnerabilities) - # Format comment with security badge - security_status = "πŸ›‘οΈ SECURE" if not vulnerabilities and not cve_vulnerabilities else "⚠️ SECURITY ISSUES" - comment = f"## {security_status} - {file_name}\n\n{review}" + # Format comment with status badge + if not issues and not cve_vulnerabilities: + status = "βœ… GOOD" + elif any(i['severity'] == 'HIGH' for i in issues): + status = "🚨 CRITICAL" + elif any(i['severity'] == 'MEDIUM' for i in issues): + status = "⚠️ ISSUES" + else: + status = "πŸ’‘ SUGGESTIONS" + + comment = f"## {status} - {file_name}\n\n{review}" comments.append(comment) @@ -344,7 +438,7 @@ def post_review(pr_number, comments): if __name__ == "__main__": try: - print("πŸ›‘οΈ Starting Security Code Review Bot...") + print("πŸ‘¨β€πŸ’» Starting Senior Engineer Code Review Bot...") # Try to get PR number from GitHub Actions context first pr_number = os.getenv("GITHUB_EVENT_NUMBER") or os.getenv("GITHUB_PR_NUMBER") @@ -355,13 +449,13 @@ def post_review(pr_number, comments): print(f"πŸ“‹ Using PR number from GitHub Actions context: {pr_number}") diffs = get_diff(pr_number) - print(f"πŸ“ Analyzing {len(diffs)} files...") + print(f"πŸ“ Reviewing {len(diffs)} files...") review_comments = review_code(diffs) if review_comments: post_review(pr_number, review_comments) - print(f"πŸŽ‰ Security review completed for PR #{pr_number}") + print(f"πŸŽ‰ Code review completed for PR #{pr_number}") else: print("ℹ️ No files to review") From 0dc8a5e521fd3c5921d5be66794f00118d5090e3 Mon Sep 17 00:00:00 2001 From: suhasramanand Date: Wed, 17 Sep 2025 16:06:31 -0400 Subject: [PATCH 12/12] =?UTF-8?q?=F0=9F=93=8B=20Add=20comprehensive=20chec?= =?UTF-8?q?klist=20format=20to=20reviews?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Added structured checklist with βœ…/❌/⚠️/πŸ’‘ status indicators - Covers all review categories: Security, Code Quality, Performance, Best Practices, Dependencies - Clear overall status: 'All checks passed! πŸŽ‰' or specific issue counts - Shows critical issues with line numbers when found - Updated status badges: 'ALL CHECKS PASSED', 'CRITICAL ISSUES', 'ISSUES FOUND', 'SUGGESTIONS' - Much clearer and more actionable than previous format --- src/review_bot.py | 128 +++++++++++++++++++++++----------------------- 1 file changed, 63 insertions(+), 65 deletions(-) diff --git a/src/review_bot.py b/src/review_bot.py index 65d34f5..264c7f3 100644 --- a/src/review_bot.py +++ b/src/review_bot.py @@ -298,80 +298,78 @@ def extract_dependencies_from_diff(patch: str) -> List[str]: return dependencies def generate_human_review(file_name: str, patch: str, issues: List[Dict], cve_vulns: List[Dict]) -> str: - """Generate human-like, concise code review using AI.""" + """Generate human-like, concise code review with checklist format.""" - # Count issues by severity and category - high_issues = [i for i in issues if i['severity'] == 'HIGH'] - medium_issues = [i for i in issues if i['severity'] == 'MEDIUM'] - low_issues = [i for i in issues if i['severity'] == 'LOW'] - - # Group by category + # Group issues by category security_issues = [i for i in issues if i['category'] == 'security'] quality_issues = [i for i in issues if i['category'] == 'code_quality'] performance_issues = [i for i in issues if i['category'] == 'performance'] best_practice_issues = [i for i in issues if i['category'] == 'best_practices'] - # Create issue summary - issue_summary = "" - if high_issues: - issue_summary += f"🚨 **{len(high_issues)} critical issues**\n" - if medium_issues: - issue_summary += f"⚠️ **{len(medium_issues)} issues**\n" - if low_issues: - issue_summary += f"πŸ’‘ **{len(low_issues)} suggestions**\n" - if cve_vulns: - issue_summary += f"πŸ” **{len(cve_vulns)} CVEs in dependencies**\n" + # Create checklist + checklist = [] + + # Security checks + if not security_issues: + checklist.append("βœ… Security - No vulnerabilities found") + else: + critical_security = [i for i in security_issues if i['severity'] == 'HIGH'] + if critical_security: + checklist.append(f"❌ Security - {len(critical_security)} critical issues") + else: + checklist.append(f"⚠️ Security - {len(security_issues)} issues") + + # Code quality checks + if not quality_issues: + checklist.append("βœ… Code Quality - Clean code") + else: + checklist.append(f"⚠️ Code Quality - {len(quality_issues)} issues") + + # Performance checks + if not performance_issues: + checklist.append("βœ… Performance - No bottlenecks") + else: + checklist.append(f"⚠️ Performance - {len(performance_issues)} issues") + + # Best practices checks + if not best_practice_issues: + checklist.append("βœ… Best Practices - Following standards") + else: + checklist.append(f"πŸ’‘ Best Practices - {len(best_practice_issues)} suggestions") + + # CVE checks + if not cve_vulns: + checklist.append("βœ… Dependencies - No known CVEs") + else: + checklist.append(f"πŸ” Dependencies - {len(cve_vulns)} CVEs found") + # Generate overall status + critical_issues = [i for i in issues if i['severity'] == 'HIGH'] if not issues and not cve_vulns: - issue_summary = "βœ… **All good**\n" + overall_status = "All checks passed! πŸŽ‰" + elif critical_issues: + overall_status = f"Critical issues found - {len(critical_issues)} need immediate attention" + elif issues: + overall_status = f"Some issues found - {len(issues)} items to review" + else: + overall_status = "Minor suggestions only" + + # Create the review content + checklist_text = "\n".join(checklist) - # Create detailed issue list (top 3 most critical) + # Add specific issue details if there are critical issues issue_details = "" - for issue in sorted(issues, key=lambda x: ['HIGH', 'MEDIUM', 'LOW'].index(x['severity']))[:3]: - category_emoji = {'security': 'πŸ›‘οΈ', 'code_quality': 'πŸ“', 'performance': '⚑', 'best_practices': '✨'} - emoji = category_emoji.get(issue['category'], 'πŸ“‹') - issue_details += f"β€’ {emoji} **Line {issue['line']}**: {issue['type'].replace('_', ' ').title()}\n" + if critical_issues: + issue_details = "\n\n**Critical Issues:**\n" + for issue in critical_issues[:3]: # Top 3 critical issues + issue_details += f"β€’ Line {issue['line']}: {issue['type'].replace('_', ' ').title()}\n" - for cve in cve_vulns[:2]: # Limit to top 2 CVEs - issue_details += f"β€’ πŸ” **{cve['package']}**: {cve['cve']}\n" + if cve_vulns: + issue_details += "\n**CVEs:**\n" + for cve in cve_vulns[:2]: # Top 2 CVEs + issue_details += f"β€’ {cve['package']}: {cve['cve']}\n" - prompt = f"""You are a senior engineer reviewing PRs. Be EXTREMELY concise and human-like. - -File: {file_name} -Code changes: -{patch[:800]}... - -Review results: -{issue_summary} -{issue_details} - -Provide a VERY brief review: -- If no issues: Just say "Looks good" or "All clear" (2-3 words max) -- If issues: Mention only the most critical issue in 1-2 lines max -- Be conversational, not formal -- Focus on what matters most - -Examples: -- "Looks good πŸ‘" -- "All clear" -- "SQL injection on line 15 - use params" -- "Missing error handling" -- "Performance issue - N+1 query" """ - - try: - chat_completion = client.chat.completions.create( - messages=[ - {"role": "system", "content": "You are a senior engineer. Be EXTREMELY brief. If no issues, just say 'Looks good'. If issues, mention only the most critical problem in 1-2 lines."}, - {"role": "user", "content": prompt} - ], - model="llama-3.3-70b-versatile", - max_tokens=80, - temperature=0.3 - ) - - return chat_completion.choices[0].message.content.strip() - except Exception as e: - return f"πŸ”§ Review completed. {issue_summary}{issue_details}" + return f"{overall_status}\n\n{checklist_text}{issue_details}" def review_code(file_diffs): """Analyze code changes with comprehensive engineering review.""" @@ -409,11 +407,11 @@ def review_code(file_diffs): # Format comment with status badge if not issues and not cve_vulnerabilities: - status = "βœ… GOOD" + status = "βœ… ALL CHECKS PASSED" elif any(i['severity'] == 'HIGH' for i in issues): - status = "🚨 CRITICAL" + status = "🚨 CRITICAL ISSUES" elif any(i['severity'] == 'MEDIUM' for i in issues): - status = "⚠️ ISSUES" + status = "⚠️ ISSUES FOUND" else: status = "πŸ’‘ SUGGESTIONS"