From 9f31f064403e694b65fb4a0038dafaf01cc1ac09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Publio=20Estupi=C3=B1=C3=A1n?= Date: Wed, 3 Sep 2025 16:29:22 -0500 Subject: [PATCH 1/2] Adds support for Azure repositories Extends platform selection to include Azure repositories. Updates URL parsing logic to correctly identify Azure repository owners. --- src/app/[owner]/[repo]/page.tsx | 2 +- src/app/page.tsx | 11 ++++++++++- src/components/ConfigurationModal.tsx | 4 ++-- src/components/ModelSelectionModal.tsx | 4 ++-- src/components/TokenInput.tsx | 14 ++++++++++++-- 5 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/app/[owner]/[repo]/page.tsx b/src/app/[owner]/[repo]/page.tsx index 80e403b8f..aa395f56c 100644 --- a/src/app/[owner]/[repo]/page.tsx +++ b/src/app/[owner]/[repo]/page.tsx @@ -2243,7 +2243,7 @@ IMPORTANT: onApply={confirmRefresh} showWikiType={true} showTokenInput={effectiveRepoInfo.type !== 'local' && !currentToken} // Show token input if not local and no current token - repositoryType={effectiveRepoInfo.type as 'github' | 'gitlab' | 'bitbucket'} + repositoryType={effectiveRepoInfo.type as 'github' | 'gitlab' | 'bitbucket' | 'azure'} authRequired={authRequired} authCode={authCode} setAuthCode={setAuthCode} diff --git a/src/app/page.tsx b/src/app/page.tsx index 9e05a2ef9..d8dce3018 100644 --- a/src/app/page.tsx +++ b/src/app/page.tsx @@ -134,7 +134,7 @@ export default function Home() { const [excludedFiles, setExcludedFiles] = useState(''); const [includedDirs, setIncludedDirs] = useState(''); const [includedFiles, setIncludedFiles] = useState(''); - const [selectedPlatform, setSelectedPlatform] = useState<'github' | 'gitlab' | 'bitbucket'>('github'); + const [selectedPlatform, setSelectedPlatform] = useState<'github' | 'gitlab' | 'bitbucket' | 'azure'>('github'); const [accessToken, setAccessToken] = useState(''); const [error, setError] = useState(null); const [isSubmitting, setIsSubmitting] = useState(false); @@ -212,6 +212,8 @@ export default function Home() { type = 'gitlab'; } else if (domain?.includes('bitbucket.org') || domain?.includes('bitbucket.')) { type = 'bitbucket'; + } else if (domain?.includes('azure.com') || domain?.includes('dev.azure.com')) { + type = 'azure'; } else { type = 'web'; // fallback for other git hosting services } @@ -221,6 +223,13 @@ export default function Home() { if (parts.length >= 2) { repo = parts[parts.length - 1] || ''; owner = parts[parts.length - 2] || ''; + if (type === 'azure') { + const userInfoMatch = input.match(/^(?:https?:\/\/)?([^@\/=\s]+)@/); + const candidateOwner = userInfoMatch?.[1] || parts[0] || ''; + owner = candidateOwner === '_git' ? (parts[0] || '') : candidateOwner; + } else { + owner = parts[parts.length - 2] || ''; + } } } // Unsupported URL formats diff --git a/src/components/ConfigurationModal.tsx b/src/components/ConfigurationModal.tsx index 7a1dae697..2c4476baa 100644 --- a/src/components/ConfigurationModal.tsx +++ b/src/components/ConfigurationModal.tsx @@ -32,8 +32,8 @@ interface ConfigurationModalProps { setCustomModel: (value: string) => void; // Platform selection - selectedPlatform: 'github' | 'gitlab' | 'bitbucket'; - setSelectedPlatform: (value: 'github' | 'gitlab' | 'bitbucket') => void; + selectedPlatform: 'github' | 'gitlab' | 'bitbucket' | 'azure'; + setSelectedPlatform: (value: 'github' | 'gitlab' | 'bitbucket' | 'azure') => void; // Access token accessToken: string; diff --git a/src/components/ModelSelectionModal.tsx b/src/components/ModelSelectionModal.tsx index 5a8ed3fe4..ad1ca9bb9 100644 --- a/src/components/ModelSelectionModal.tsx +++ b/src/components/ModelSelectionModal.tsx @@ -37,7 +37,7 @@ interface ModelSelectionModalProps { // Token input for refresh showTokenInput?: boolean; - repositoryType?: 'github' | 'gitlab' | 'bitbucket'; + repositoryType?: 'github' | 'gitlab' | 'bitbucket' | 'azure'; // Authentication authRequired?: boolean; authCode?: string; @@ -91,7 +91,7 @@ export default function ModelSelectionModal({ // Token input state const [localAccessToken, setLocalAccessToken] = useState(''); - const [localSelectedPlatform, setLocalSelectedPlatform] = useState<'github' | 'gitlab' | 'bitbucket'>(repositoryType); + const [localSelectedPlatform, setLocalSelectedPlatform] = useState<'github' | 'gitlab' | 'bitbucket' | 'azure'>(repositoryType); const [showTokenSection, setShowTokenSection] = useState(showTokenInput); // Reset local state when modal is opened diff --git a/src/components/TokenInput.tsx b/src/components/TokenInput.tsx index 14fadcd8d..1ab1314af 100644 --- a/src/components/TokenInput.tsx +++ b/src/components/TokenInput.tsx @@ -4,8 +4,8 @@ import React from 'react'; import { useLanguage } from '@/contexts/LanguageContext'; interface TokenInputProps { - selectedPlatform: 'github' | 'gitlab' | 'bitbucket'; - setSelectedPlatform: (value: 'github' | 'gitlab' | 'bitbucket') => void; + selectedPlatform: 'github' | 'gitlab' | 'bitbucket' | 'azure'; + setSelectedPlatform: (value: 'github' | 'gitlab' | 'bitbucket' | 'azure') => void; accessToken: string; setAccessToken: (value: string) => void; showTokenSection?: boolean; @@ -76,6 +76,16 @@ export default function TokenInput({ > Bitbucket + )} From 90d1df0a07676b874b6c5f47a9ab82b1523bfe5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Publio=20Estupi=C3=B1=C3=A1n?= Date: Thu, 4 Sep 2025 13:56:43 -0500 Subject: [PATCH 2/2] Adds branch support for repository ingestion Extends repository ingestion capabilities to support specifying a branch, allowing users to target specific branches for cloning and file content retrieval. This enables users to work with specific versions of codebases without relying solely on the default branch. It improves the flexibility and precision of repository analysis. --- api/data_pipeline.py | 148 +++++++++++++++----------- api/rag.py | 6 +- api/websocket_wiki.py | 7 +- src/app/[owner]/[repo]/page.tsx | 56 ++++++---- src/app/page.tsx | 10 ++ src/components/ConfigurationModal.tsx | 29 +++++ src/messages/en.json | 2 + src/messages/es.json | 2 + src/messages/fr.json | 2 + src/messages/ja.json | 2 + src/messages/kr.json | 2 + src/messages/pt-br.json | 2 + src/messages/ru.json | 2 + src/messages/vi.json | 2 + src/messages/zh-tw.json | 2 + src/messages/zh.json | 2 + src/types/repoinfo.tsx | 1 + 17 files changed, 189 insertions(+), 88 deletions(-) diff --git a/api/data_pipeline.py b/api/data_pipeline.py index a8c0b6610..26dba91ed 100644 --- a/api/data_pipeline.py +++ b/api/data_pipeline.py @@ -55,7 +55,7 @@ def count_tokens(text: str, is_ollama_embedder: bool = None) -> int: # Rough approximation: 4 characters per token return len(text) // 4 -def download_repo(repo_url: str, local_path: str, type: str = "github", access_token: str = None) -> str: +def download_repo(repo_url: str, local_path: str, type: str = "github", access_token: str = None, branch: str = None) -> str: """ Downloads a Git repository (GitHub, GitLab, or Bitbucket) to a specified local path. @@ -63,6 +63,7 @@ def download_repo(repo_url: str, local_path: str, type: str = "github", access_t repo_url (str): The URL of the Git repository to clone. local_path (str): The local directory where the repository will be cloned. access_token (str, optional): Access token for private repositories. + branch (str, optional): Branch to clone. Returns: str: The output message from the `git` command. @@ -105,10 +106,14 @@ def download_repo(repo_url: str, local_path: str, type: str = "github", access_t logger.info("Using access token for authentication") # Clone the repository - logger.info(f"Cloning repository from {repo_url} to {local_path}") + logger.info(f"Cloning repository from {repo_url} to {local_path} (branch: {branch or 'default'})") # We use repo_url in the log to avoid exposing the token in logs + clone_cmd = ["git", "clone", "--depth=1", "--single-branch"] + if branch: + clone_cmd += ["--branch", branch] + clone_cmd += [clone_url, local_path] result = subprocess.run( - ["git", "clone", "--depth=1", "--single-branch", clone_url, local_path], + clone_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, @@ -413,7 +418,7 @@ def transform_documents_and_save_to_db( db.save_state(filepath=db_path) return db -def get_github_file_content(repo_url: str, file_path: str, access_token: str = None) -> str: +def get_github_file_content(repo_url: str, file_path: str, access_token: str = None, branch: str = None) -> str: """ Retrieves the content of a file from a GitHub repository using the GitHub API. Supports both public GitHub (github.com) and GitHub Enterprise (custom domains). @@ -423,6 +428,7 @@ def get_github_file_content(repo_url: str, file_path: str, access_token: str = N (e.g., "https://github.com/username/repo" or "https://github.company.com/username/repo") file_path (str): The path to the file within the repository (e.g., "src/main.py") access_token (str, optional): GitHub personal access token for private repositories + branch (str, optional): Branch name or tag to fetch from Returns: str: The content of the file as a string @@ -455,6 +461,8 @@ def get_github_file_content(repo_url: str, file_path: str, access_token: str = N # Use GitHub API to get file content # The API endpoint for getting file content is: /repos/{owner}/{repo}/contents/{path} api_url = f"{api_base}/repos/{owner}/{repo}/contents/{file_path}" + if branch: + api_url += f"?ref={branch}" # Fetch file content from GitHub API headers = {} @@ -490,7 +498,7 @@ def get_github_file_content(repo_url: str, file_path: str, access_token: str = N except Exception as e: raise ValueError(f"Failed to get file content: {str(e)}") -def get_gitlab_file_content(repo_url: str, file_path: str, access_token: str = None) -> str: +def get_gitlab_file_content(repo_url: str, file_path: str, access_token: str = None, branch: str = None) -> str: """ Retrieves the content of a file from a GitLab repository (cloud or self-hosted). @@ -498,6 +506,7 @@ def get_gitlab_file_content(repo_url: str, file_path: str, access_token: str = N repo_url (str): The GitLab repo URL (e.g., "https://gitlab.com/username/repo" or "http://localhost/group/project") file_path (str): File path within the repository (e.g., "src/main.py") access_token (str, optional): GitLab personal access token + branch (str, optional): Branch name or tag to fetch from Returns: str: File content @@ -525,27 +534,29 @@ def get_gitlab_file_content(repo_url: str, file_path: str, access_token: str = N # Encode file path encoded_file_path = quote(file_path, safe='') - # Try to get the default branch from the project info - default_branch = None - try: - project_info_url = f"{gitlab_domain}/api/v4/projects/{encoded_project_path}" - project_headers = {} - if access_token: - project_headers["PRIVATE-TOKEN"] = access_token - - project_response = requests.get(project_info_url, headers=project_headers) - if project_response.status_code == 200: - project_data = project_response.json() - default_branch = project_data.get('default_branch', 'main') - logger.info(f"Found default branch: {default_branch}") - else: - logger.warning(f"Could not fetch project info, using 'main' as default branch") - default_branch = 'main' - except Exception as e: - logger.warning(f"Error fetching project info: {e}, using 'main' as default branch") - default_branch = 'main' + # Determine branch to use + ref_to_use = branch + if not ref_to_use: + # Try to get the default branch from the project info + try: + project_info_url = f"{gitlab_domain}/api/v4/projects/{encoded_project_path}" + project_headers = {} + if access_token: + project_headers["PRIVATE-TOKEN"] = access_token + + project_response = requests.get(project_info_url, headers=project_headers) + if project_response.status_code == 200: + project_data = project_response.json() + ref_to_use = project_data.get('default_branch', 'main') + logger.info(f"Found default branch: {ref_to_use}") + else: + logger.warning(f"Could not fetch project info, using 'main' as default branch") + ref_to_use = 'main' + except Exception as e: + logger.warning(f"Error fetching project info: {e}, using 'main' as default branch") + ref_to_use = 'main' - api_url = f"{gitlab_domain}/api/v4/projects/{encoded_project_path}/repository/files/{encoded_file_path}/raw?ref={default_branch}" + api_url = f"{gitlab_domain}/api/v4/projects/{encoded_project_path}/repository/files/{encoded_file_path}/raw?ref={ref_to_use}" # Fetch file content from GitLab API headers = {} if access_token: @@ -572,7 +583,7 @@ def get_gitlab_file_content(repo_url: str, file_path: str, access_token: str = N except Exception as e: raise ValueError(f"Failed to get file content: {str(e)}") -def get_bitbucket_file_content(repo_url: str, file_path: str, access_token: str = None) -> str: +def get_bitbucket_file_content(repo_url: str, file_path: str, access_token: str = None, branch: str = None) -> str: """ Retrieves the content of a file from a Bitbucket repository using the Bitbucket API. @@ -580,6 +591,7 @@ def get_bitbucket_file_content(repo_url: str, file_path: str, access_token: str repo_url (str): The URL of the Bitbucket repository (e.g., "https://bitbucket.org/username/repo") file_path (str): The path to the file within the repository (e.g., "src/main.py") access_token (str, optional): Bitbucket personal access token for private repositories + branch (str, optional): Branch name or tag to fetch from Returns: str: The content of the file as a string @@ -596,29 +608,31 @@ def get_bitbucket_file_content(repo_url: str, file_path: str, access_token: str owner = parts[-2] repo = parts[-1].replace(".git", "") - # Try to get the default branch from the repository info - default_branch = None - try: - repo_info_url = f"https://api.bitbucket.org/2.0/repositories/{owner}/{repo}" - repo_headers = {} - if access_token: - repo_headers["Authorization"] = f"Bearer {access_token}" - - repo_response = requests.get(repo_info_url, headers=repo_headers) - if repo_response.status_code == 200: - repo_data = repo_response.json() - default_branch = repo_data.get('mainbranch', {}).get('name', 'main') - logger.info(f"Found default branch: {default_branch}") - else: - logger.warning(f"Could not fetch repository info, using 'main' as default branch") - default_branch = 'main' - except Exception as e: - logger.warning(f"Error fetching repository info: {e}, using 'main' as default branch") - default_branch = 'main' + # Determine branch to use + branch_to_use = branch + if not branch_to_use: + # Try to get the default branch from the repository info + try: + repo_info_url = f"https://api.bitbucket.org/2.0/repositories/{owner}/{repo}" + repo_headers = {} + if access_token: + repo_headers["Authorization"] = f"Bearer {access_token}" + + repo_response = requests.get(repo_info_url, headers=repo_headers) + if repo_response.status_code == 200: + repo_data = repo_response.json() + branch_to_use = repo_data.get('mainbranch', {}).get('name', 'main') + logger.info(f"Found default branch: {branch_to_use}") + else: + logger.warning(f"Could not fetch repository info, using 'main' as default branch") + branch_to_use = 'main' + except Exception as e: + logger.warning(f"Error fetching repository info: {e}, using 'main' as default branch") + branch_to_use = 'main' # Use Bitbucket API to get file content # The API endpoint for getting file content is: /2.0/repositories/{owner}/{repo}/src/{branch}/{path} - api_url = f"https://api.bitbucket.org/2.0/repositories/{owner}/{repo}/src/{default_branch}/{file_path}" + api_url = f"https://api.bitbucket.org/2.0/repositories/{owner}/{repo}/src/{branch_to_use}/{file_path}" # Fetch file content from Bitbucket API headers = {} @@ -648,14 +662,15 @@ def get_bitbucket_file_content(repo_url: str, file_path: str, access_token: str raise ValueError(f"Failed to get file content: {str(e)}") -def get_file_content(repo_url: str, file_path: str, type: str = "github", access_token: str = None) -> str: +def get_file_content(repo_url: str, file_path: str, type: str = "github", access_token: str = None, branch: str = None) -> str: """ - Retrieves the content of a file from a Git repository (GitHub or GitLab). + Retrieves the content of a file from a Git repository (GitHub, GitLab, or Bitbucket). Args: repo_url (str): The URL of the repository file_path (str): The path to the file within the repository access_token (str, optional): Access token for private repositories + branch (str, optional): Branch name to read from Returns: str: The content of the file as a string @@ -664,13 +679,13 @@ def get_file_content(repo_url: str, file_path: str, type: str = "github", access ValueError: If the file cannot be fetched or if the URL is not valid """ if type == "github": - return get_github_file_content(repo_url, file_path, access_token) + return get_github_file_content(repo_url, file_path, access_token, branch) elif type == "gitlab": - return get_gitlab_file_content(repo_url, file_path, access_token) + return get_gitlab_file_content(repo_url, file_path, access_token, branch) elif type == "bitbucket": - return get_bitbucket_file_content(repo_url, file_path, access_token) + return get_bitbucket_file_content(repo_url, file_path, access_token, branch) else: - raise ValueError("Unsupported repository URL. Only GitHub and GitLab are supported.") + raise ValueError("Unsupported repository URL. Only GitHub, GitLab and Bitbucket are supported.") class DatabaseManager: """ @@ -684,7 +699,7 @@ def __init__(self): def prepare_database(self, repo_url_or_path: str, type: str = "github", access_token: str = None, is_ollama_embedder: bool = None, excluded_dirs: List[str] = None, excluded_files: List[str] = None, - included_dirs: List[str] = None, included_files: List[str] = None) -> List[Document]: + included_dirs: List[str] = None, included_files: List[str] = None, branch: str = None) -> List[Document]: """ Create a new database from the repository. @@ -702,7 +717,7 @@ def prepare_database(self, repo_url_or_path: str, type: str = "github", access_t List[Document]: List of Document objects """ self.reset_database() - self._create_repo(repo_url_or_path, type, access_token) + self._create_repo(repo_url_or_path, type, access_token, branch) return self.prepare_db_index(is_ollama_embedder=is_ollama_embedder, excluded_dirs=excluded_dirs, excluded_files=excluded_files, included_dirs=included_dirs, included_files=included_files) @@ -714,7 +729,7 @@ def reset_database(self): self.repo_url_or_path = None self.repo_paths = None - def _extract_repo_name_from_url(self, repo_url_or_path: str, repo_type: str) -> str: + def _extract_repo_name_from_url(self, repo_url_or_path: str, repo_type: str, branch: str = None) -> str: # Extract owner and repo name to create unique identifier url_parts = repo_url_or_path.rstrip('/').split('/') @@ -727,18 +742,23 @@ def _extract_repo_name_from_url(self, repo_url_or_path: str, repo_type: str) -> repo_name = f"{owner}_{repo}" else: repo_name = url_parts[-1].replace(".git", "") + # Append branch suffix if provided + if branch: + safe_branch = re.sub(r"[^A-Za-z0-9._-]+", "_", branch) + repo_name = f"{repo_name}__{safe_branch}" return repo_name - def _create_repo(self, repo_url_or_path: str, repo_type: str = "github", access_token: str = None) -> None: + def _create_repo(self, repo_url_or_path: str, repo_type: str = "github", access_token: str = None, branch: str = None) -> None: """ Download and prepare all paths. Paths: - ~/.adalflow/repos/{owner}_{repo_name} (for url, local path will be the same) - ~/.adalflow/databases/{owner}_{repo_name}.pkl + ~/.adalflow/repos/{owner}_{repo_name}[__branch] (for url, local path will be the same) + ~/.adalflow/databases/{owner}_{repo_name}[__branch].pkl Args: repo_url_or_path (str): The URL or local path of the repository access_token (str, optional): Access token for private repositories + branch (str, optional): Branch to use """ logger.info(f"Preparing repo storage for {repo_url_or_path}...") @@ -749,7 +769,7 @@ def _create_repo(self, repo_url_or_path: str, repo_type: str = "github", access_ # url if repo_url_or_path.startswith("https://") or repo_url_or_path.startswith("http://"): # Extract the repository name from the URL - repo_name = self._extract_repo_name_from_url(repo_url_or_path, repo_type) + repo_name = self._extract_repo_name_from_url(repo_url_or_path, repo_type, branch) logger.info(f"Extracted repo name: {repo_name}") save_repo_dir = os.path.join(root_path, "repos", repo_name) @@ -757,11 +777,14 @@ def _create_repo(self, repo_url_or_path: str, repo_type: str = "github", access_ # Check if the repository directory already exists and is not empty if not (os.path.exists(save_repo_dir) and os.listdir(save_repo_dir)): # Only download if the repository doesn't exist or is empty - download_repo(repo_url_or_path, save_repo_dir, repo_type, access_token) + download_repo(repo_url_or_path, save_repo_dir, repo_type, access_token, branch) else: logger.info(f"Repository already exists at {save_repo_dir}. Using existing repository.") else: # local path repo_name = os.path.basename(repo_url_or_path) + if branch: + safe_branch = re.sub(r"[^A-Za-z0-9._-]+", "_", branch) + repo_name = f"{repo_name}__{safe_branch}" save_repo_dir = repo_url_or_path save_db_file = os.path.join(root_path, "databases", f"{repo_name}.pkl") @@ -826,7 +849,7 @@ def prepare_db_index(self, is_ollama_embedder: bool = None, excluded_dirs: List[ logger.info(f"Total transformed documents: {len(transformed_docs)}") return transformed_docs - def prepare_retriever(self, repo_url_or_path: str, type: str = "github", access_token: str = None): + def prepare_retriever(self, repo_url_or_path: str, type: str = "github", access_token: str = None, branch: str = None): """ Prepare the retriever for a repository. This is a compatibility method for the isolated API. @@ -834,8 +857,9 @@ def prepare_retriever(self, repo_url_or_path: str, type: str = "github", access_ Args: repo_url_or_path (str): The URL or local path of the repository access_token (str, optional): Access token for private repositories + branch (str, optional): Branch to use Returns: List[Document]: List of Document objects """ - return self.prepare_database(repo_url_or_path, type, access_token) + return self.prepare_database(repo_url_or_path, type, access_token, branch=branch) diff --git a/api/rag.py b/api/rag.py index 3ff916988..8d194212a 100644 --- a/api/rag.py +++ b/api/rag.py @@ -343,7 +343,7 @@ def _validate_and_filter_embeddings(self, documents: List) -> List: def prepare_retriever(self, repo_url_or_path: str, type: str = "github", access_token: str = None, excluded_dirs: List[str] = None, excluded_files: List[str] = None, - included_dirs: List[str] = None, included_files: List[str] = None): + included_dirs: List[str] = None, included_files: List[str] = None, branch: str = None): """ Prepare the retriever for a repository. Will load database from local storage if available. @@ -355,6 +355,7 @@ def prepare_retriever(self, repo_url_or_path: str, type: str = "github", access_ excluded_files: Optional list of file patterns to exclude from processing included_dirs: Optional list of directories to include exclusively included_files: Optional list of file patterns to include exclusively + branch: Optional branch to use for cloning and indexing """ self.initialize_db_manager() self.repo_url_or_path = repo_url_or_path @@ -366,7 +367,8 @@ def prepare_retriever(self, repo_url_or_path: str, type: str = "github", access_ excluded_dirs=excluded_dirs, excluded_files=excluded_files, included_dirs=included_dirs, - included_files=included_files + included_files=included_files, + branch=branch ) logger.info(f"Loaded {len(self.transformed_docs)} documents for retrieval") diff --git a/api/websocket_wiki.py b/api/websocket_wiki.py index 2a7cce9e3..05d2c701d 100644 --- a/api/websocket_wiki.py +++ b/api/websocket_wiki.py @@ -38,6 +38,7 @@ class ChatCompletionRequest(BaseModel): filePath: Optional[str] = Field(None, description="Optional path to a file in the repository to include in the prompt") token: Optional[str] = Field(None, description="Personal access token for private repositories") type: Optional[str] = Field("github", description="Type of repository (e.g., 'github', 'gitlab', 'bitbucket')") + branch: Optional[str] = Field(None, description="Branch name to use for retrieval and file content") # model parameters provider: str = Field("google", description="Model provider (google, openai, openrouter, ollama, azure)") @@ -95,7 +96,7 @@ async def handle_websocket_chat(websocket: WebSocket): included_files = [unquote(file_pattern) for file_pattern in request.included_files.split('\n') if file_pattern.strip()] logger.info(f"Using custom included files: {included_files}") - request_rag.prepare_retriever(request.repo_url, request.type, request.token, excluded_dirs, excluded_files, included_dirs, included_files) + request_rag.prepare_retriever(request.repo_url, request.type, request.token, excluded_dirs, excluded_files, included_dirs, included_files, branch=request.branch) logger.info(f"Retriever prepared for {request.repo_url}") except ValueError as e: if "No valid documents with embeddings found" in str(e): @@ -391,8 +392,8 @@ async def handle_websocket_chat(websocket: WebSocket): file_content = "" if request.filePath: try: - file_content = get_file_content(request.repo_url, request.filePath, request.type, request.token) - logger.info(f"Successfully retrieved content for file: {request.filePath}") + file_content = get_file_content(request.repo_url, request.filePath, request.type, request.token, request.branch) + logger.info(f"Successfully retrieved content for file: {request.filePath} (branch: {request.branch or 'default'})") except Exception as e: logger.error(f"Error retrieving file content: {str(e)}") # Continue without file content if there's an error diff --git a/src/app/[owner]/[repo]/page.tsx b/src/app/[owner]/[repo]/page.tsx index 80e403b8f..5a2362a20 100644 --- a/src/app/[owner]/[repo]/page.tsx +++ b/src/app/[owner]/[repo]/page.tsx @@ -106,7 +106,8 @@ const addTokensToRequestBody = ( excludedDirs?: string, excludedFiles?: string, includedDirs?: string, - includedFiles?: string + includedFiles?: string, + branch?: string ): void => { if (token !== '') { requestBody.token = token; @@ -134,7 +135,9 @@ const addTokensToRequestBody = ( if (includedFiles) { requestBody.included_files = includedFiles; } - + if (branch) { + requestBody.branch = branch; + } }; const createGithubHeaders = (githubToken: string): HeadersInit => { @@ -199,6 +202,7 @@ export default function RepoWikiPage() { : repoUrl?.includes('github.com') ? 'github' : searchParams.get('type') || 'github'; + const branch = searchParams.get('branch') || ''; // Import language context for translations const { messages } = useLanguage(); @@ -210,8 +214,9 @@ export default function RepoWikiPage() { type: repoType, token: token || null, localPath: localPath || null, - repoUrl: repoUrl || null - }), [owner, repo, repoType, localPath, repoUrl, token]); + repoUrl: repoUrl || null, + branch: branch || null, + }), [owner, repo, repoType, localPath, repoUrl, token, branch]); // State variables const [isLoading, setIsLoading] = useState(true); @@ -511,7 +516,7 @@ Remember: }; // Add tokens if available - addTokensToRequestBody(requestBody, currentToken, effectiveRepoInfo.type, selectedProviderState, selectedModelState, isCustomSelectedModelState, customSelectedModelState, language, modelExcludedDirs, modelExcludedFiles, modelIncludedDirs, modelIncludedFiles); + addTokensToRequestBody(requestBody, currentToken, effectiveRepoInfo.type, selectedProviderState, selectedModelState, isCustomSelectedModelState, customSelectedModelState, language, modelExcludedDirs, modelExcludedFiles, modelIncludedDirs, modelIncludedFiles, branch); // Use WebSocket for communication let content = ''; @@ -808,7 +813,7 @@ IMPORTANT: }; // Add tokens if available - addTokensToRequestBody(requestBody, currentToken, effectiveRepoInfo.type, selectedProviderState, selectedModelState, isCustomSelectedModelState, customSelectedModelState, language, modelExcludedDirs, modelExcludedFiles, modelIncludedDirs, modelIncludedFiles); + addTokensToRequestBody(requestBody, currentToken, effectiveRepoInfo.type, selectedProviderState, selectedModelState, isCustomSelectedModelState, customSelectedModelState, language, modelExcludedDirs, modelExcludedFiles, modelIncludedDirs, modelIncludedFiles, branch); // Use WebSocket for communication let responseText = ''; @@ -1211,22 +1216,31 @@ IMPORTANT: }; const githubApiBaseUrl = getGithubApiUrl(effectiveRepoInfo.repoUrl); - // First, try to get the default branch from the repository info - let defaultBranchLocal = null; - try { - const repoInfoResponse = await fetch(`${githubApiBaseUrl}/repos/${owner}/${repo}`, { - headers: createGithubHeaders(currentToken) - }); - - if (repoInfoResponse.ok) { - const repoData = await repoInfoResponse.json(); - defaultBranchLocal = repoData.default_branch; - console.log(`Found default branch: ${defaultBranchLocal}`); - // Store the default branch in state - setDefaultBranch(defaultBranchLocal || 'main'); + // First, try to get the default branch from effectiveRepoInfo or the repository info + let defaultBranchLocal = effectiveRepoInfo?.branch && `${effectiveRepoInfo.branch}`.trim() !== '' + ? `${effectiveRepoInfo.branch}`.trim() + : null; + + // If branch not provided, fetch repository info to determine default branch + if (!defaultBranchLocal) { + try { + const repoInfoResponse = await fetch(`${githubApiBaseUrl}/repos/${owner}/${repo}`, { + headers: createGithubHeaders(currentToken) + }); + + if (repoInfoResponse.ok) { + const repoData = await repoInfoResponse.json(); + defaultBranchLocal = repoData.default_branch; + console.log(`Found default branch: ${defaultBranchLocal}`); + // Store the default branch in state + setDefaultBranch(defaultBranchLocal || 'main'); + } + } catch (err) { + console.warn('Could not fetch repository info for default branch:', err); } - } catch (err) { - console.warn('Could not fetch repository info for default branch:', err); + } else { + // If we already have a branch from effectiveRepoInfo, store it + setDefaultBranch(defaultBranchLocal); } // Create list of branches to try, prioritizing the actual default branch diff --git a/src/app/page.tsx b/src/app/page.tsx index 9e05a2ef9..0d385233f 100644 --- a/src/app/page.tsx +++ b/src/app/page.tsx @@ -76,6 +76,7 @@ export default function Home() { }; const [repositoryInput, setRepositoryInput] = useState('https://github.com/AsyncFuncAI/deepwiki-open'); + const [branch, setBranch] = useState(''); const REPO_CONFIG_CACHE_KEY = 'deepwikiRepoConfigCache'; @@ -98,6 +99,7 @@ export default function Home() { setExcludedFiles(config.excludedFiles || ''); setIncludedDirs(config.includedDirs || ''); setIncludedFiles(config.includedFiles || ''); + setBranch(config.branch || ''); } } } catch (error) { @@ -322,6 +324,7 @@ export default function Home() { excludedFiles, includedDirs, includedFiles, + branch, }; existingConfigs[currentRepoUrl] = configToSave; localStorage.setItem(REPO_CONFIG_CACHE_KEY, JSON.stringify(existingConfigs)); @@ -376,6 +379,11 @@ export default function Home() { params.append('included_files', includedFiles); } + // Add branch parameter if provided + if (branch && branch.trim() !== '') { + params.append('branch', branch.trim()); + } + // Add language parameter params.append('language', selectedLanguage); @@ -445,6 +453,8 @@ export default function Home() { isOpen={isConfigModalOpen} onClose={() => setIsConfigModalOpen(false)} repositoryInput={repositoryInput} + branch={branch} + setBranch={setBranch} selectedLanguage={selectedLanguage} setSelectedLanguage={setSelectedLanguage} supportedLanguages={supportedLanguages} diff --git a/src/components/ConfigurationModal.tsx b/src/components/ConfigurationModal.tsx index 7a1dae697..b237349a4 100644 --- a/src/components/ConfigurationModal.tsx +++ b/src/components/ConfigurationModal.tsx @@ -12,6 +12,10 @@ interface ConfigurationModalProps { // Repository input repositoryInput: string; + // Branch input + branch: string; + setBranch: (value: string) => void; + // Language selection selectedLanguage: string; setSelectedLanguage: (value: string) => void; @@ -64,6 +68,8 @@ export default function ConfigurationModal({ isOpen, onClose, repositoryInput, + branch, + setBranch, selectedLanguage, setSelectedLanguage, supportedLanguages, @@ -135,6 +141,29 @@ export default function ConfigurationModal({ + {/* Branch input */} +
+ + setBranch(e.target.value)} + placeholder="e.g., main, master, develop" + className="input-japanese block w-full px-3 py-2 text-sm rounded-md bg-transparent text-[var(--foreground)] focus:outline-none focus:border-[var(--accent-primary)]" + /> +
+ + + + + {t.form?.branchHelp || "If left empty, the repository's default branch will be used."} + +
+
+ {/* Language selection */}