diff --git a/.gitignore b/.gitignore index c6076f1af..f38377e79 100644 --- a/.gitignore +++ b/.gitignore @@ -275,3 +275,7 @@ website/.docusaurus # Generated from testing /test/fixtures/test-package/package-lock.json + +# Benchmark results +benchmark-detailed-*/ +results*.csv diff --git a/config.schema.json b/config.schema.json index dafb93c3f..f299eb284 100644 --- a/config.schema.json +++ b/config.schema.json @@ -357,6 +357,26 @@ } } } + }, + "cache": { + "description": "Configuration for bare repository cache (hybrid cache system)", + "type": "object", + "properties": { + "maxSizeGB": { + "type": "number", + "description": "Maximum cache size in gigabytes (default 2GB)" + }, + "maxRepositories": { + "type": "number", + "description": "Maximum number of repositories in cache (default 50)" + }, + "cacheDir": { + "type": "string", + "description": "Directory path for bare repository cache (default ./.remote/cache)" + } + }, + "required": ["maxSizeGB", "maxRepositories", "cacheDir"], + "additionalProperties": false } }, "definitions": { diff --git a/package.json b/package.json index 56c5679dd..1a7187dd3 100644 --- a/package.json +++ b/package.json @@ -93,7 +93,6 @@ "express-rate-limit": "^8.1.0", "express-session": "^1.18.2", "history": "5.3.0", - "isomorphic-git": "^1.34.0", "jsonwebtoken": "^9.0.2", "jwk-to-pem": "^2.0.7", "load-plugin": "^6.0.3", diff --git a/proxy.config.json b/proxy.config.json index a57d51da8..31023e745 100644 --- a/proxy.config.json +++ b/proxy.config.json @@ -178,5 +178,10 @@ "loginRequired": true } ] + }, + "cache": { + "maxSizeGB": 2, + "maxRepositories": 50, + "cacheDir": "./.remote/cache" } } diff --git a/scripts/cache-benchmark.sh b/scripts/cache-benchmark.sh new file mode 100755 index 000000000..6144cc874 --- /dev/null +++ b/scripts/cache-benchmark.sh @@ -0,0 +1,173 @@ +#!/bin/bash + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +CYAN='\033[0;36m' +BOLD='\033[1m' +NC='\033[0m' + +echo -e "${BOLD}${BLUE}═══════════════════════════════════════════════════════════${NC}" +echo -e "${BOLD}${BLUE} Git Proxy Hybrid Cache - Detailed Performance Benchmark${NC}" +echo -e "${BOLD}${BLUE}═══════════════════════════════════════════════════════════${NC}" +echo "" + +PROXY_URL="http://localhost:8000" +GITHUB_REPO="${1}" +TEST_BRANCH="${2:-main}" +NUM_PUSHES="${3:-10}" + +if [ -z "$GITHUB_REPO" ]; then + echo -e "${RED}ERROR: GitHub repository required${NC}" + echo "" + echo "Usage: $0 [branch] [num_pushes]" + echo "Example: $0 yourFork/backstage main 10" + echo "" + echo -e "${YELLOW}Note: You must have push access to the specified repository${NC}" + exit 1 +fi + +PROXY_REPO_URL="$PROXY_URL/github.com/$GITHUB_REPO.git" + +echo -e "${CYAN}Configuration:${NC}" +echo " Proxy URL: $PROXY_URL" +echo " GitHub Repo: $GITHUB_REPO" +echo " Branch: $TEST_BRANCH" +echo " Number of pushes: $NUM_PUSHES (1 cold + $((NUM_PUSHES-1)) warm)" +echo "" + +echo -e "${YELLOW}[1/5] Checking git-proxy status...${NC}" +if ! curl -s "$PROXY_URL" > /dev/null 2>&1; then + echo -e "${RED}✗ ERROR: git-proxy not running on $PROXY_URL${NC}" + exit 1 +fi +echo -e "${GREEN}✓ Git-proxy is running${NC}\n" + +echo -e "${YELLOW}[2/5] Retrieving GitHub credentials...${NC}" +CREDENTIALS=$(echo -e "protocol=https\nhost=github.com\n" | git credential fill 2>/dev/null) +if [ -z "$CREDENTIALS" ]; then + echo -e "${RED}✗ ERROR: No GitHub credentials found${NC}" + exit 1 +fi + +GITHUB_USERNAME=$(echo "$CREDENTIALS" | grep "^username=" | cut -d= -f2) +GITHUB_TOKEN=$(echo "$CREDENTIALS" | grep "^password=" | cut -d= -f2) +GITHUB_EMAIL=$(git config --global user.email || echo "$GITHUB_USERNAME@users.noreply.github.com") +echo -e "${GREEN}✓ Credentials retrieved for: $GITHUB_USERNAME${NC}\n" + +TEST_DIR="./benchmark-detailed-$(date +%s)" +echo -e "${YELLOW}[3/5] Setting up test environment...${NC}" +mkdir -p "$TEST_DIR" && cd "$TEST_DIR" +REPO_NAME=$(basename "$GITHUB_REPO") + +echo " → Clearing cache..." +rm -rf ../.remote/cache/* ../.remote/work/* 2>/dev/null || true +echo -e "${GREEN}✓ Cache cleared${NC}\n" + +echo -e "${YELLOW}[4/5] Performing initial clone (one-time operation)...${NC}" +echo -e "${CYAN}→ Cloning $GITHUB_REPO via proxy...${NC}\n" +START_INITIAL_CLONE=$(date +%s.%N) +git clone "$PROXY_REPO_URL" "$REPO_NAME" +CLONE_EXIT_CODE=$? +END_INITIAL_CLONE=$(date +%s.%N) + +INITIAL_CLONE_TIME=$(echo "$END_INITIAL_CLONE - $START_INITIAL_CLONE" | bc) + +cd "$REPO_NAME" +git config user.email "$GITHUB_EMAIL" +git config user.name "$GITHUB_USERNAME" +echo -e "${GREEN}✓ Initial clone completed in ${INITIAL_CLONE_TIME}s${NC}\n" + +RESULTS_FILE="../results-detailed.csv" +echo "push_number,is_cold,push_time_s" > "$RESULTS_FILE" + +perform_push() { + local push_num=$1 + local is_cold=$2 + local label=$([ "$is_cold" = "true" ] && echo "COLD CACHE" || echo "WARM CACHE") + + echo -e "${BLUE}═══ Push #$push_num ($label) ═══${NC}" + + local commit_file="benchmark-push-$push_num-$(date +%s).txt" + echo "Benchmark push $push_num at $(date)" > "$commit_file" + git add "$commit_file" > /dev/null 2>&1 + git commit -m "Benchmark push #$push_num" > /dev/null 2>&1 + + echo -n " Pushing... " + START_PUSH=$(date +%s.%N) + PUSH_OUTPUT=$(git -c credential.helper="!f() { echo username=$GITHUB_USERNAME; echo password=$GITHUB_TOKEN; }; f" \ + push "$PROXY_REPO_URL" "HEAD:refs/heads/benchmark-test-$push_num" 2>&1) + PUSH_EXIT_CODE=$? + END_PUSH=$(date +%s.%N) + PUSH_TIME=$(echo "$END_PUSH - $START_PUSH" | bc) + + if [ $PUSH_EXIT_CODE -ne 0 ]; then + echo -e "${RED}✗ FAILED${NC}" + echo "$PUSH_OUTPUT" + echo "" + exit 1 + fi + + echo -e "${GREEN}✓ ${PUSH_TIME}s${NC}" + echo "$push_num,$is_cold,$PUSH_TIME" >> "$RESULTS_FILE" + echo "" +} + +echo -e "${YELLOW}[5/5] Running push benchmark...${NC}\n" + +perform_push 1 true +for i in $(seq 2 $NUM_PUSHES); do + perform_push $i false +done + +cd .. + +echo -e "${BOLD}${BLUE}═══════════════════════════════════════════════════════════${NC}" +echo -e "${BOLD}${BLUE} Performance Analysis ${NC}" +echo -e "${BOLD}${BLUE}═══════════════════════════════════════════════════════════${NC}\n" + +RESULTS_CSV="results-detailed.csv" +COLD_TIME=$(awk -F, 'NR==2 {print $3}' "$RESULTS_CSV") +WARM_TIMES=$(awk -F, 'NR>2 {print $3}' "$RESULTS_CSV") + +WARM_MIN=$(echo "$WARM_TIMES" | sort -n | head -1) +WARM_MAX=$(echo "$WARM_TIMES" | sort -n | tail -1) +WARM_AVG=$(echo "$WARM_TIMES" | awk '{sum+=$1; count++} END {print sum/count}') +WARM_COUNT=$(echo "$WARM_TIMES" | wc -l | tr -d ' ') +WARM_STDDEV=$(echo "$WARM_TIMES" | awk -v avg="$WARM_AVG" '{sum+=($1-avg)^2; count++} END {print sqrt(sum/count)}') + +SPEEDUP=$(echo "scale=2; $COLD_TIME / $WARM_AVG" | bc) +IMPROVEMENT=$(echo "scale=1; (1 - $WARM_AVG / $COLD_TIME) * 100" | bc) + +TOTAL_WARM_TIME=$(echo "$WARM_TIMES" | awk '{sum+=$1} END {print sum}') +HYPOTHETICAL_NO_CACHE=$(echo "scale=2; $COLD_TIME * $WARM_COUNT" | bc) +TIME_SAVED=$(echo "scale=2; $HYPOTHETICAL_NO_CACHE - $TOTAL_WARM_TIME" | bc) +TIME_SAVED_MINUTES=$(echo "scale=1; $TIME_SAVED / 60" | bc) + +echo -e "${CYAN}${BOLD}Push Performance:${NC}\n" +printf " %-25s %10.2fs\n" "Cold cache (Push #1):" "$COLD_TIME" +printf " %-25s %10.2fs\n" "Warm cache (average):" "$WARM_AVG" +printf " %-25s %10.2fs\n" "Warm cache (min):" "$WARM_MIN" +printf " %-25s %10.2fs\n" "Warm cache (max):" "$WARM_MAX" +printf " %-25s %10.2fs\n" "Warm cache (std dev):" "$WARM_STDDEV" + +echo -e "\n${GREEN}${BOLD}Performance Improvement:${NC}\n" +printf " %-25s %10.1f%%\n" "Speed improvement:" "$IMPROVEMENT" +printf " %-25s %10.2fx\n" "Speedup ratio:" "$SPEEDUP" + +echo -e "\n${CYAN}${BOLD}Total Time Saved:${NC}\n" +printf " %-30s %10.2fs\n" "Total warm pushes time:" "$TOTAL_WARM_TIME" +printf " %-30s %10.2fs\n" "Hypothetical (no cache):" "$HYPOTHETICAL_NO_CACHE" +printf " %-30s %10.2fs (%.1fm)\n" "Time saved:" "$TIME_SAVED" "$TIME_SAVED_MINUTES" + +echo -e "\n${CYAN}${BOLD}Cache Statistics:${NC}\n" +CACHE_DIR="../.remote/cache" +if [ -d "$CACHE_DIR" ]; then + FINAL_CACHE_SIZE=$(du -sh "$CACHE_DIR" 2>/dev/null | cut -f1) + FINAL_CACHE_COUNT=$(ls -1 "$CACHE_DIR" 2>/dev/null | wc -l | tr -d ' ') + printf " %-25s %10s\n" "Cache size:" "$FINAL_CACHE_SIZE" + printf " %-25s %10s\n" "Cached repositories:" "$FINAL_CACHE_COUNT" +fi + +echo -e "\n${GREEN}${BOLD}✓ Benchmark complete!${NC}" diff --git a/src/config/generated/config.ts b/src/config/generated/config.ts index 4d3493e1a..d61df7dcc 100644 --- a/src/config/generated/config.ts +++ b/src/config/generated/config.ts @@ -36,6 +36,10 @@ export interface GitProxyConfig { * List of repositories that are authorised to be pushed to through the proxy. */ authorisedList?: AuthorisedRepo[]; + /** + * Configuration for bare repository cache (hybrid cache system) + */ + cache?: Cache; /** * Block commits based on rules defined over author/committer e-mail addresses, commit * message content and diff content @@ -286,6 +290,24 @@ export interface AuthorisedRepo { [property: string]: any; } +/** + * Configuration for bare repository cache (hybrid cache system) + */ +export interface Cache { + /** + * Directory path for bare repository cache (default ./.remote/cache) + */ + cacheDir: string; + /** + * Maximum number of repositories in cache (default 50) + */ + maxRepositories: number; + /** + * Maximum cache size in gigabytes (default 2GB) + */ + maxSizeGB: number; +} + /** * Block commits based on rules defined over author/committer e-mail addresses, commit * message content and diff content @@ -690,6 +712,7 @@ const typeMap: any = { typ: u(undefined, a(r('AuthenticationElement'))), }, { json: 'authorisedList', js: 'authorisedList', typ: u(undefined, a(r('AuthorisedRepo'))) }, + { json: 'cache', js: 'cache', typ: u(undefined, r('Cache')) }, { json: 'commitConfig', js: 'commitConfig', typ: u(undefined, r('CommitConfig')) }, { json: 'configurationSources', js: 'configurationSources', typ: u(undefined, 'any') }, { json: 'contactEmail', js: 'contactEmail', typ: u(undefined, '') }, @@ -793,6 +816,14 @@ const typeMap: any = { ], 'any', ), + Cache: o( + [ + { json: 'cacheDir', js: 'cacheDir', typ: '' }, + { json: 'maxRepositories', js: 'maxRepositories', typ: 3.14 }, + { json: 'maxSizeGB', js: 'maxSizeGB', typ: 3.14 }, + ], + false, + ), CommitConfig: o( [ { json: 'author', js: 'author', typ: u(undefined, r('Author')) }, diff --git a/src/config/index.ts b/src/config/index.ts index 6c108d3fc..5534ac8ba 100644 --- a/src/config/index.ts +++ b/src/config/index.ts @@ -103,6 +103,9 @@ function mergeConfigurations( commitConfig: { ...defaultConfig.commitConfig, ...userSettings.commitConfig }, attestationConfig: { ...defaultConfig.attestationConfig, ...userSettings.attestationConfig }, rateLimit: userSettings.rateLimit || defaultConfig.rateLimit, + cache: userSettings.cache + ? { ...defaultConfig.cache, ...userSettings.cache } + : defaultConfig.cache, tls: tlsConfig, tempPassword: { ...defaultConfig.tempPassword, ...userSettings.tempPassword }, // Preserve legacy SSL fields @@ -196,6 +199,7 @@ export const logConfiguration = () => { console.log(`data sink = ${JSON.stringify(getDatabase())}`); console.log(`authentication = ${JSON.stringify(getAuthMethods())}`); console.log(`rateLimit = ${JSON.stringify(getRateLimit())}`); + console.log(`cache = ${JSON.stringify(getCacheConfig())}`); }; export const getAPIs = () => { @@ -285,6 +289,11 @@ export const getRateLimit = () => { return config.rateLimit; }; +export const getCacheConfig = () => { + const config = loadFullConfiguration(); + return config.cache; +}; + // Function to handle configuration updates const handleConfigUpdate = async (newConfig: Configuration) => { console.log('Configuration updated from external source'); diff --git a/src/proxy/processors/push-action/README.md b/src/proxy/processors/push-action/README.md new file mode 100644 index 000000000..6c3ccd07b --- /dev/null +++ b/src/proxy/processors/push-action/README.md @@ -0,0 +1,200 @@ +# Hybrid Cache Architecture + +## Overview + +The hybrid cache architecture optimizes Git repository cloning by splitting the cache into two layers: + +1. **Bare Cache** (persistent, shared) - Stores minimal Git data shared across all requests +2. **Working Copy** (temporary, isolated) - Per-request workspace for push validation + +## How pullRemote Works + +### Phase 1: Bare Cache (Persistent, Shared) + +```typescript +const bareRepo = path.join(BARE_CACHE, action.repoName); + +if (bareExists) { + // CACHE HIT: Fast fetch to update existing bare repo + await gitOps.fetch({ + dir: bareRepo, + url: action.url, + bare: true, + depth: 1, + }); + cacheManager.touchRepository(action.repoName); // Update LRU timestamp +} else { + // CACHE MISS: Clone new bare repository + await gitOps.clone({ + dir: bareRepo, + url: action.url, + bare: true, + depth: 1, + }); +} +``` + +**Key Points:** + +- Bare repositories contain only `.git` data (no working tree) +- Shared across all push requests for the same repository +- Uses LRU eviction based on `maxSizeGB` and `maxRepositories` limits +- `touchRepository()` updates access time for LRU tracking + +### Phase 2: Working Copy (Temporary, Isolated) + +```typescript +const workCopy = path.join(WORK_DIR, action.id); +const workCopyPath = path.join(workCopy, action.repoName); + +// Fast local clone from bare cache +await gitOps.cloneLocal({ + sourceDir: bareRepo, + targetDir: workCopyPath, + depth: 1, +}); + +action.proxyGitPath = workCopy; // Used by subsequent processors +``` + +**Key Points:** + +- Each push request gets an isolated working copy +- Cloned from local bare cache (fast, no network) +- Cleaned up after push validation completes + +### Phase 3: Cache Management + +```typescript +const evictionResult = await cacheManager.enforceLimits(); +``` + +**CacheManager** uses LRU (Least Recently Used) eviction: + +- Monitors total cache size and repository count +- Removes oldest repositories when limits are exceeded +- Thread-safe via mutex to prevent race conditions + +## Performance Benchmarks + +Real-world performance comparison using the Backstage repository (177MB cached bare repo with `depth: 1`). + +### Benchmark Setup + +- **Test Repository**: Backstage (medium-large repository, 177MB cached) +- **Test Method**: 10 consecutive push operations (1 cold + 9 warm) +- **Cache Configuration**: Bare repositories with `depth: 1` (shallow clone) +- **Benchmark Script**: [`cache-benchmark.sh`](../../../../scripts/cache-benchmark.sh) + +### Results Comparison + +| Metric | Without Cache (main) | With Cache (PR) | Improvement | +| ------------------- | -------------------- | --------------- | -------------------- | +| **Cold Push** | 20.63s | 17.58s | 1.2x faster | +| **Warm Push (avg)** | 19.88s | **6.68s** | **3x faster** | +| **Warm Push (min)** | 18.37s | 6.34s | 2.9x faster | +| **Warm Push (max)** | 21.22s | 7.12s | 3x faster | +| **Std Deviation** | 0.99s | 0.19s | 5x more consistent | +| **Speedup Ratio** | 1.03x | **2.63x** | **2.6x improvement** | + +### Time Saved + +**Without Cache (main branch)**: + +- 9 warm pushes: 178.93s total +- Every push requires full GitHub clone + +**With Cache (this PR)**: + +- 9 warm pushes: 60.16s total +- **Time saved: 98.10s (1.6 minutes)** +- **Efficiency gain: 66%** + +### Running the Benchmark + +To reproduce these results with your own repository fork: + +```bash +# Test with cache (this PR branch) +./scripts/cache-benchmark.sh owner/repo +``` + +**Example**: + +```bash +./scripts/cache-benchmark.sh yourFork/backstage main 10 +``` + +**Note**: Results may vary based on network conditions, GitHub server load, and repository size. The benchmark uses `depth: 1` for all git operations. You must have push access to the repository you're testing. + +## Cache Configuration + +In `proxy.config.json`: + +```json +{ + "cache": { + "maxSizeGB": 2, // Maximum total cache size + "maxRepositories": 50, // Maximum number of cached repos + "cacheDir": "./.remote/cache" // Bare cache location + } +} +``` + +## Concurrency & Thread Safety + +The `CacheManager` uses a Promise-based mutex to serialize cache operations: + +```typescript +private mutex: Promise = Promise.resolve(); + +async touchRepository(repoName: string): Promise { + return this.acquireLock(() => { + // Atomic operation + }); +} + +async enforceLimits(): Promise<{ removedRepos: string[]; freedBytes: number }> { + return this.acquireLock(() => { + // Atomic operation + }); +} +``` + +**Race Conditions Prevented:** + +- Multiple `enforceLimits()` calls removing the same repository +- `touchRepository()` updating while `enforceLimits()` is removing +- `getCacheStats()` reading while repositories are being deleted + +## Cleanup Strategy + +**Bare Cache:** + +- Cleaned via LRU eviction (oldest repositories removed first) +- Triggered after every push via `enforceLimits()` +- Respects `maxSizeGB` and `maxRepositories` limits + +**Working Copies:** + +- Automatically cleaned by `clearBareClone.ts` after push completes +- Each request's `action.id` directory is deleted +- No manual cleanup needed + +## Monitoring & Debugging + +**Cache Statistics:** + +```typescript +const stats = cacheManager.getCacheStats(); +console.log(`Total repos: ${stats.totalRepositories}`); +console.log(`Total size: ${stats.totalSizeBytes / (1024 * 1024)}MB`); +``` + +**LRU Eviction Logs:** + +```typescript +const result = await cacheManager.enforceLimits(); +console.log(`Evicted ${result.removedRepos.length} repositories`); +console.log(`Freed ${result.freedBytes / (1024 * 1024)}MB`); +``` diff --git a/src/proxy/processors/push-action/cache-manager.ts b/src/proxy/processors/push-action/cache-manager.ts new file mode 100644 index 000000000..fc711f2ea --- /dev/null +++ b/src/proxy/processors/push-action/cache-manager.ts @@ -0,0 +1,203 @@ +import fs from 'fs'; +import path from 'path'; +import { getCacheConfig } from '../../../config'; + +export interface CacheStats { + totalRepositories: number; + totalSizeBytes: number; + repositories: Array<{ + name: string; + sizeBytes: number; + lastAccessed: Date; + }>; +} + +export class CacheManager { + private repoCacheDir: string; + private maxSizeGB: number; + private maxRepositories: number; + private mutex: Promise = Promise.resolve(); + + constructor( + repoCacheDir: string = './.remote/cache', + maxSizeGB: number = 2, + maxRepositories: number = 50, + ) { + this.repoCacheDir = repoCacheDir; + this.maxSizeGB = maxSizeGB; + this.maxRepositories = maxRepositories; + } + + /** + * Acquire mutex lock for cache operations + */ + private async acquireLock(operation: () => T | Promise): Promise { + const previousLock = this.mutex; + let releaseLock: () => void; + + this.mutex = new Promise((resolve) => { + releaseLock = resolve; + }); + + try { + await previousLock; + return await operation(); + } finally { + releaseLock!(); + } + } + + /** + * Update access time for repository (for LRU purposes) + */ + async touchRepository(repoName: string): Promise { + return this.acquireLock(() => { + const repoPath = path.join(this.repoCacheDir, repoName); + if (fs.existsSync(repoPath)) { + const now = new Date(); + fs.utimesSync(repoPath, now, now); + } + }); + } + + /** + * Get cache statistics + */ + getCacheStats(): CacheStats { + if (!fs.existsSync(this.repoCacheDir)) { + return { + totalRepositories: 0, + totalSizeBytes: 0, + repositories: [], + }; + } + + const repositories: Array<{ name: string; sizeBytes: number; lastAccessed: Date }> = []; + let totalSizeBytes = 0; + + const entries = fs.readdirSync(this.repoCacheDir, { withFileTypes: true }); + + for (const entry of entries) { + if (entry.isDirectory()) { + const repoPath = path.join(this.repoCacheDir, entry.name); + const sizeBytes = this.getDirectorySize(repoPath); + const stats = fs.statSync(repoPath); + + repositories.push({ + name: entry.name, + sizeBytes, + lastAccessed: stats.atime, + }); + + totalSizeBytes += sizeBytes; + } + } + + return { + totalRepositories: repositories.length, + totalSizeBytes, + repositories, + }; + } + + /** + * Enforce cache limits using LRU eviction + */ + async enforceLimits(): Promise<{ removedRepos: string[]; freedBytes: number }> { + return this.acquireLock(() => { + const stats = this.getCacheStats(); + const removedRepos: string[] = []; + let freedBytes = 0; + + // Sort repositories by last accessed (oldest first for removal) + const reposToEvaluate = stats.repositories.toSorted( + (a, b) => a.lastAccessed.getTime() - b.lastAccessed.getTime(), + ); + + // Check size limit - convert GB to bytes once + let currentSizeBytes = stats.totalSizeBytes; + const maxSizeBytes = this.maxSizeGB * 1024 * 1024 * 1024; + + for (const repo of reposToEvaluate) { + const shouldRemove = + currentSizeBytes > maxSizeBytes || // Over size limit + stats.totalRepositories - removedRepos.length > this.maxRepositories; // Over count limit + + if (shouldRemove) { + this.removeRepository(repo.name); + removedRepos.push(repo.name); + freedBytes += repo.sizeBytes; + currentSizeBytes -= repo.sizeBytes; + } else { + break; // We've cleaned enough + } + } + + return { removedRepos, freedBytes }; + }); + } + + /** + * Remove specific repository from cache + */ + private removeRepository(repoName: string): void { + const repoPath = path.join(this.repoCacheDir, repoName); + if (fs.existsSync(repoPath)) { + fs.rmSync(repoPath, { recursive: true, force: true }); + } + } + + /** + * Calculate directory size in bytes + */ + private getDirectorySize(dirPath: string): number { + let totalBytes = 0; + + const calculateSize = (currentPath: string) => { + const items = fs.readdirSync(currentPath, { withFileTypes: true }); + + for (const item of items) { + const itemPath = path.join(currentPath, item.name); + + if (item.isDirectory()) { + calculateSize(itemPath); + } else { + try { + const stats = fs.statSync(itemPath); + totalBytes += stats.size; + } catch (error) { + console.warn(`[CacheManager] Failed to stat file ${itemPath}:`, error); + } + } + } + }; + + try { + calculateSize(dirPath); + } catch (error) { + console.warn(`[CacheManager] Failed to calculate size for ${dirPath}:`, error); + return 0; + } + + return totalBytes; + } + + /** + * Get cache configuration + */ + getConfig() { + return { + maxSizeGB: this.maxSizeGB, + maxRepositories: this.maxRepositories, + repoCacheDir: this.repoCacheDir, + }; + } +} + +// Global instance initialized with config +const config = getCacheConfig(); +export const cacheManager = new CacheManager( + config?.cacheDir, + config?.maxSizeGB, + config?.maxRepositories, +); diff --git a/src/proxy/processors/push-action/clearBareClone.ts b/src/proxy/processors/push-action/clearBareClone.ts index 91f7f5b22..6c2b582c9 100644 --- a/src/proxy/processors/push-action/clearBareClone.ts +++ b/src/proxy/processors/push-action/clearBareClone.ts @@ -1,16 +1,31 @@ import { Action, Step } from '../../actions'; import fs from 'node:fs'; +import path from 'node:path'; +import { cacheManager } from './cache-manager'; -const exec = async (req: any, action: Action): Promise => { +const exec = async (_req: any, action: Action): Promise => { const step = new Step('clearBareClone'); - // Recursively remove the contents of ./.remote and ignore exceptions - fs.rm('./.remote', { recursive: true, force: true }, (err) => { - if (err) { - throw err; + // Get work directory from configuration + const config = cacheManager.getConfig(); + const WORK_DIR = path.join(path.dirname(config.repoCacheDir), 'work'); + + // Delete ONLY this push's working copy + const workCopy = path.join(WORK_DIR, action.id); + + if (fs.existsSync(workCopy)) { + try { + fs.rmSync(workCopy, { recursive: true, force: true }); + step.log(`Cleaned working copy for push ${action.id}`); + } catch (err) { + step.log(`Warning: Could not clean working copy ${workCopy}: ${err}`); } - console.log(`.remote is deleted!`); - }); + } else { + step.log(`Working copy ${workCopy} not found (may have been already cleaned)`); + } + + // Note: Cache limit enforcement is handled by pullRemote after cloning + step.log('Working copy cleanup complete'); action.addStep(step); return action; diff --git a/src/proxy/processors/push-action/git-operations.ts b/src/proxy/processors/push-action/git-operations.ts new file mode 100644 index 000000000..59335e7e8 --- /dev/null +++ b/src/proxy/processors/push-action/git-operations.ts @@ -0,0 +1,173 @@ +import { spawnSync } from 'child_process'; +import fs from 'fs'; + +/** + * Git operations using native git commands + */ + +/** + * Build URL with credentials if provided + */ +function buildAuthUrl(url: string, username?: string, password?: string): string { + if (username && password) { + return url.replace( + /^(https?:\/\/)/, + `$1${encodeURIComponent(username)}:${encodeURIComponent(password)}@`, + ); + } + return url; +} + +interface CloneOptions { + dir: string; + url: string; + username?: string; + password?: string; + bare?: boolean; + depth?: number; + singleBranch?: boolean; +} + +interface FetchOptions { + dir: string; + url: string; + username?: string; + password?: string; + depth?: number; + prune?: boolean; + bare?: boolean; +} + +/** + * Clone a repository using native git + */ +export async function clone(options: CloneOptions): Promise { + const { dir, url, username, password, bare = false, depth, singleBranch = false } = options; + + const authUrl = buildAuthUrl(url, username, password); + + const args: string[] = ['clone']; + + if (bare) { + args.push('--bare'); + } + + if (depth) { + args.push('--depth', depth.toString()); + } + + if (singleBranch) { + args.push('--single-branch'); + } else { + // Explicitly clone all branches (needed when using --depth) + args.push('--no-single-branch'); + } + + args.push(authUrl, dir); + + const result = spawnSync('git', args, { stdio: 'pipe' }); + if (result.status !== 0) { + throw new Error(`Git clone failed: ${result.stderr?.toString() || 'Unknown error'}`); + } + + // Sanitize credentials from git config + if (username && password) { + sanitizeCredentials(dir, url, bare); + } +} + +/** + * Fetch updates in a repository using native git + */ +export async function fetch(options: FetchOptions): Promise { + const { dir, url, username, password, depth, prune = false, bare = false } = options; + + const authUrl = buildAuthUrl(url, username, password); + + const args: string[] = ['-C', dir, 'fetch']; + + if (depth) { + args.push('--depth', depth.toString()); + } + + if (prune) { + args.push('--prune'); + } + + args.push(authUrl); + args.push('+refs/heads/*:refs/heads/*'); // Fetch all branches + + const result = spawnSync('git', args, { stdio: 'pipe' }); + if (result.status !== 0) { + throw new Error(`Git fetch failed: ${result.stderr?.toString() || 'Unknown error'}`); + } + + // Sanitize credentials from git config + if (username && password) { + sanitizeCredentials(dir, url, bare); + } +} + +/** + * Remove credentials from git config and set clean URL + */ +function sanitizeCredentials(dir: string, cleanUrl: string, isBare: boolean): void { + try { + // For bare repositories, git clone --bare doesn't set up a remote by default + // We need to add it first if it doesn't exist + if (isBare) { + let result = spawnSync('git', ['-C', dir, 'remote', 'add', 'origin', cleanUrl], { + stdio: 'pipe', + }); + if (result.status !== 0) { + // If remote already exists, update it + result = spawnSync('git', ['-C', dir, 'remote', 'set-url', 'origin', cleanUrl], { + stdio: 'pipe', + }); + if (result.status !== 0) { + throw new Error(`Failed to set remote: ${result.stderr?.toString()}`); + } + } + } else { + // For non-bare repositories, remote origin should exist + // Unset the URL with credentials (ignore error if already unset) + spawnSync('git', ['-C', dir, 'config', '--unset', 'remote.origin.url'], { + stdio: 'pipe', + }); + + // Set clean URL without credentials + const result = spawnSync('git', ['-C', dir, 'remote', 'set-url', 'origin', cleanUrl], { + stdio: 'pipe', + }); + if (result.status !== 0) { + throw new Error(`Failed to set remote: ${result.stderr?.toString()}`); + } + } + } catch (e) { + console.warn(`Warning: Failed to sanitize credentials for ${dir}:`, e); + } +} + +/** + * Clone from local repository (for working copy from bare cache) + */ +export async function cloneLocal(options: { + sourceDir: string; + targetDir: string; + depth?: number; +}): Promise { + const { sourceDir, targetDir, depth } = options; + + const args: string[] = ['clone']; + + if (depth) { + args.push('--depth', depth.toString()); + } + + args.push(sourceDir, targetDir); + + const result = spawnSync('git', args, { stdio: 'pipe' }); + if (result.status !== 0) { + throw new Error(`Git local clone failed: ${result.stderr?.toString() || 'Unknown error'}`); + } +} diff --git a/src/proxy/processors/push-action/metrics.ts b/src/proxy/processors/push-action/metrics.ts new file mode 100644 index 000000000..d07c4f853 --- /dev/null +++ b/src/proxy/processors/push-action/metrics.ts @@ -0,0 +1,38 @@ +import { Step } from '../../actions'; +import { performance } from 'perf_hooks'; + +/** + * Performance Timer + * + * Logs basic timing info for operations + */ +export class PerformanceTimer { + private step: Step; + private startTime: number = 0; + private operation: string = ''; + + constructor(step: Step) { + this.step = step; + } + + start(operation: string): void { + this.operation = operation; + this.startTime = performance.now(); + this.step.log(`${operation} started`); + } + + mark(message: string): void { + if (this.startTime > 0) { + const elapsed = performance.now() - this.startTime; + this.step.log(`${message}: ${elapsed.toFixed(2)}ms`); + } + } + + end(): void { + if (this.startTime > 0) { + const totalTime = performance.now() - this.startTime; + this.step.log(`${this.operation} completed: ${totalTime.toFixed(2)}ms`); + this.startTime = 0; + } + } +} diff --git a/src/proxy/processors/push-action/pullRemote.ts b/src/proxy/processors/push-action/pullRemote.ts index 73b8981ec..305b6e890 100644 --- a/src/proxy/processors/push-action/pullRemote.ts +++ b/src/proxy/processors/push-action/pullRemote.ts @@ -1,45 +1,139 @@ import { Action, Step } from '../../actions'; import fs from 'fs'; -import git from 'isomorphic-git'; -import gitHttpClient from 'isomorphic-git/http/node'; - -const dir = './.remote'; +import path from 'path'; +import { PerformanceTimer } from './metrics'; +import { cacheManager } from './cache-manager'; +import * as gitOps from './git-operations'; const exec = async (req: any, action: Action): Promise => { const step = new Step('pullRemote'); + const timer = new PerformanceTimer(step); try { - action.proxyGitPath = `${dir}/${action.id}`; + // Get cache directories from configuration + const config = cacheManager.getConfig(); + const BARE_CACHE = config.repoCacheDir; + const WORK_DIR = path.join(path.dirname(BARE_CACHE), 'work'); - if (!fs.existsSync(dir)) { - fs.mkdirSync(dir); - } + // Paths for hybrid architecture + const bareRepo = path.join(BARE_CACHE, action.repoName); + const workCopy = path.join(WORK_DIR, action.id); - if (!fs.existsSync(action.proxyGitPath)) { - step.log(`Creating folder ${action.proxyGitPath}`); - fs.mkdirSync(action.proxyGitPath, 0o755); - } + // Check if bare cache exists + const bareExists = fs.existsSync(bareRepo); - const cmd = `git clone ${action.url}`; - step.log(`Executing ${cmd}`); + step.log(`Bare cache: ${bareExists ? 'EXISTS' : 'MISSING'}`); + step.log(`Strategy: ${bareExists ? 'FETCH + LOCAL_CLONE' : 'BARE_CLONE + LOCAL_CLONE'}`); + + // Start timing + const strategy = bareExists ? 'CACHED' : 'CLONE'; + timer.start(`${strategy} ${action.repoName}`); + + if (!fs.existsSync(BARE_CACHE)) { + fs.mkdirSync(BARE_CACHE, { recursive: true }); + } + if (!fs.existsSync(WORK_DIR)) { + fs.mkdirSync(WORK_DIR, { recursive: true }); + } + timer.mark('Setup complete'); const authHeader = req.headers?.authorization; const [username, password] = Buffer.from(authHeader.split(' ')[1], 'base64') .toString() .split(':'); - await git.clone({ - fs, - http: gitHttpClient, - url: action.url, - dir: `${action.proxyGitPath}/${action.repoName}`, - onAuth: () => ({ username, password }), - singleBranch: true, + // PHASE 1: Bare Cache (persistent, shared) + if (bareExists) { + // CACHE HIT: Fetch updates in bare repository + step.log(`Fetching updates in bare cache...`); + + try { + await gitOps.fetch({ + dir: bareRepo, + url: action.url, + username, + password, + depth: 1, + prune: true, + bare: true, + }); + + // Update access time for LRU + await cacheManager.touchRepository(action.repoName); + timer.mark('Fetch complete'); + step.log(`Bare repository updated`); + } catch (fetchError) { + step.log(`Fetch failed, rebuilding bare cache: ${fetchError}`); + // Remove broken cache and re-clone + if (fs.existsSync(bareRepo)) { + fs.rmSync(bareRepo, { recursive: true, force: true }); + } + + // Re-clone as fallback + await gitOps.clone({ + dir: bareRepo, + url: action.url, + username, + password, + bare: true, + depth: 1, + }); + + timer.mark('Bare clone complete (fallback)'); + } + } else { + // CACHE MISS: Clone bare repository + step.log(`Cloning bare repository to cache...`); + + await gitOps.clone({ + dir: bareRepo, + url: action.url, + username, + password, + bare: true, + depth: 1, + }); + + timer.mark('Bare clone complete'); + step.log(`Bare repository created at ${bareRepo}`); + + // Update access time for LRU after successful clone + await cacheManager.touchRepository(action.repoName); + } + + // PHASE 2: Working Copy (temporary, isolated) + step.log(`Creating isolated working copy for push ${action.id}...`); + + const workCopyPath = path.join(workCopy, action.repoName); + + // Clone from local bare cache (fast local operation) + await gitOps.cloneLocal({ + sourceDir: bareRepo, + targetDir: workCopyPath, depth: 1, }); - step.log(`Completed ${cmd}`); - step.setContent(`Completed ${cmd}`); + timer.mark('Working copy ready'); + step.log(`Working copy created at ${workCopyPath}`); + + // Set action path to working copy + action.proxyGitPath = workCopy; + + const completedMsg = bareExists + ? `Completed fetch + local clone (hybrid cache)` + : `Completed bare clone + local clone (hybrid cache)`; + + step.log(completedMsg); + step.setContent(completedMsg); + + timer.end(); + + // Enforce cache limits (LRU eviction on bare cache) + const evictionResult = await cacheManager.enforceLimits(); + if (evictionResult.removedRepos.length > 0) { + const freedMB = (evictionResult.freedBytes / (1024 * 1024)).toFixed(2); + step.log(`LRU evicted ${evictionResult.removedRepos.length} bare repos, freed ${freedMB}MB`); + } } catch (e: any) { step.setError(e.toString('utf-8')); throw e; diff --git a/test/ConfigLoader.test.js b/test/ConfigLoader.test.js index 76c659855..7eb06e74d 100644 --- a/test/ConfigLoader.test.js +++ b/test/ConfigLoader.test.js @@ -480,6 +480,8 @@ describe('ConfigLoader', () => { }); it('should throw error if config path was not found', async function () { + this.timeout(10000); + const source = { type: 'git', repository: 'https://github.com/finos/git-proxy.git', @@ -497,6 +499,8 @@ describe('ConfigLoader', () => { }); it('should throw error if config file is not valid JSON', async function () { + this.timeout(10000); + const source = { type: 'git', repository: 'https://github.com/finos/git-proxy.git', diff --git a/test/processors/cacheManager.test.js b/test/processors/cacheManager.test.js new file mode 100644 index 000000000..7cb84b9f0 --- /dev/null +++ b/test/processors/cacheManager.test.js @@ -0,0 +1,165 @@ +const { expect } = require('chai'); +const fs = require('fs'); +const path = require('path'); +const { CacheManager } = require('../../src/proxy/processors/push-action/cache-manager'); + +describe('CacheManager', () => { + let testCacheDir; + let cacheManager; + + beforeEach(() => { + // Create temporary test cache directory + testCacheDir = path.join('./.remote', 'test-cache-' + Date.now()); + if (!fs.existsSync(testCacheDir)) { + fs.mkdirSync(testCacheDir, { recursive: true }); + } + cacheManager = new CacheManager(testCacheDir, 0.001, 3); // 1MB, 3 repos max + }); + + afterEach(() => { + // Clean up test cache directory + if (fs.existsSync(testCacheDir)) { + fs.rmSync(testCacheDir, { recursive: true, force: true }); + } + }); + + describe('getCacheStats', () => { + it('should return empty stats for empty cache', () => { + const stats = cacheManager.getCacheStats(); + expect(stats.totalRepositories).to.equal(0); + expect(stats.totalSizeBytes).to.equal(0); + expect(stats.repositories).to.be.an('array').that.is.empty; + }); + + it('should calculate stats for repositories in cache', () => { + const repo1 = path.join(testCacheDir, 'repo1.git'); + const repo2 = path.join(testCacheDir, 'repo2.git'); + + fs.mkdirSync(repo1); + fs.mkdirSync(repo2); + + fs.writeFileSync(path.join(repo1, 'file1.txt'), 'a'.repeat(1024 * 1024)); // 1MB + fs.writeFileSync(path.join(repo2, 'file2.txt'), 'b'.repeat(1024 * 1024)); // 1MB + + const stats = cacheManager.getCacheStats(); + expect(stats.totalRepositories).to.equal(2); + expect(stats.totalSizeBytes).to.be.at.least(2 * 1024 * 1024); // At least 2MB total in bytes + expect(stats.repositories).to.have.lengthOf(2); + expect(stats.repositories[0]).to.have.property('name'); + expect(stats.repositories[0]).to.have.property('sizeBytes'); + expect(stats.repositories[0]).to.have.property('lastAccessed'); + }); + + it('should have timestamps for repositories', () => { + const repo1 = path.join(testCacheDir, 'repo1.git'); + const repo2 = path.join(testCacheDir, 'repo2.git'); + + fs.mkdirSync(repo1); + fs.writeFileSync(path.join(repo1, 'file1.txt'), 'test'); + + fs.mkdirSync(repo2); + fs.writeFileSync(path.join(repo2, 'file2.txt'), 'test'); + + const stats = cacheManager.getCacheStats(); + expect(stats.repositories).to.have.lengthOf(2); + // Each should have a valid timestamp + stats.repositories.forEach((repo) => { + expect(repo.lastAccessed).to.be.instanceOf(Date); + expect(repo.lastAccessed.getTime()).to.be.greaterThan(0); + }); + }); + }); + + describe('touchRepository', () => { + it('should update repository access time', async () => { + const repoName = 'test-repo.git'; + const repoPath = path.join(testCacheDir, repoName); + + fs.mkdirSync(repoPath); + fs.writeFileSync(path.join(repoPath, 'file.txt'), 'test'); + + const statsBefore = cacheManager.getCacheStats(); + const timeBefore = statsBefore.repositories[0].lastAccessed.getTime(); + + await new Promise((resolve) => setTimeout(resolve, 100)); + + await cacheManager.touchRepository(repoName); + + const statsAfter = cacheManager.getCacheStats(); + const timeAfter = statsAfter.repositories[0].lastAccessed.getTime(); + + expect(timeAfter).to.be.greaterThan(timeBefore); + }); + + it('should not throw error for non-existent repository', async () => { + // Should not throw + await cacheManager.touchRepository('non-existent.git'); + }); + }); + + describe('enforceLimits', () => { + it('should remove oldest repositories when exceeding count limit', async () => { + // Create 4 repos (exceeds limit of 3) + for (let i = 1; i <= 4; i++) { + const repoPath = path.join(testCacheDir, `repo${i}.git`); + fs.mkdirSync(repoPath); + fs.writeFileSync(path.join(repoPath, 'file.txt'), 'a'.repeat(100 * 1024)); // 100KB + } + + const statsBefore = cacheManager.getCacheStats(); + expect(statsBefore.totalRepositories).to.equal(4); + + const result = await cacheManager.enforceLimits(); + + expect(result.removedRepos).to.have.lengthOf.at.least(1); + expect(result.freedBytes).to.be.at.least(0); + + const statsAfter = cacheManager.getCacheStats(); + expect(statsAfter.totalRepositories).to.be.at.most(3); + }); + + it('should remove repositories when exceeding size limit', async () => { + // Create repo that exceeds size limit (1MB) + const repo1 = path.join(testCacheDir, 'repo1.git'); + fs.mkdirSync(repo1); + fs.writeFileSync(path.join(repo1, 'largefile.txt'), 'a'.repeat(2 * 1024 * 1024)); // 2MB + + const statsBefore = cacheManager.getCacheStats(); + expect(statsBefore.totalSizeBytes).to.be.greaterThan(1024 * 1024); // Greater than 1MB in bytes + + const result = await cacheManager.enforceLimits(); + + expect(result.removedRepos).to.have.lengthOf(1); + expect(result.freedBytes).to.be.greaterThan(1024 * 1024); // Greater than 1MB in bytes + + const statsAfter = cacheManager.getCacheStats(); + expect(statsAfter.totalRepositories).to.equal(0); + }); + + it('should not remove anything if limits not exceeded', async () => { + // Create 2 repos (under limit of 3) + for (let i = 1; i <= 2; i++) { + const repoPath = path.join(testCacheDir, `repo${i}.git`); + fs.mkdirSync(repoPath); + fs.writeFileSync(path.join(repoPath, 'file.txt'), 'test'); + } + + const result = await cacheManager.enforceLimits(); + + expect(result.removedRepos).to.be.empty; + expect(result.freedBytes).to.equal(0); + }); + }); + + describe('getConfig', () => { + it('should return cache configuration', () => { + const config = cacheManager.getConfig(); + + expect(config).to.deep.equal({ + maxSizeGB: 0.001, + maxRepositories: 3, + repoCacheDir: testCacheDir, + }); + }); + }); +}); diff --git a/test/processors/clearBareClone.test.js b/test/processors/clearBareClone.test.js index c58460913..4aaa01bc1 100644 --- a/test/processors/clearBareClone.test.js +++ b/test/processors/clearBareClone.test.js @@ -11,7 +11,7 @@ const actionId = '123__456'; const timestamp = Date.now(); describe('clear bare and local clones', async () => { - it('pull remote generates a local .remote folder', async () => { + it('pull remote generates a local .remote folder with hybrid cache structure', async () => { const action = new Action(actionId, 'type', 'get', timestamp, 'finos/git-proxy.git'); action.url = 'https://github.com/finos/git-proxy.git'; @@ -26,14 +26,17 @@ describe('clear bare and local clones', async () => { action, ); - expect(fs.existsSync(`./.remote/${actionId}`)).to.be.true; + // Hybrid cache creates: .remote/cache (bare repos) and .remote/work (working copies) + expect(fs.existsSync(`./.remote/work/${actionId}`)).to.be.true; + expect(fs.existsSync(`./.remote/cache/git-proxy.git`)).to.be.true; }).timeout(20000); - it('clear bare clone function purges .remote folder and specific clone folder', async () => { + it('clear bare clone function removes working copy and enforces cache limits', async () => { const action = new Action(actionId, 'type', 'get', timestamp, 'finos/git-proxy.git'); await clearBareClone(null, action); - expect(fs.existsSync(`./.remote`)).to.throw; - expect(fs.existsSync(`./.remote/${actionId}`)).to.throw; + // clearBareClone removes only the working copy for this push + expect(fs.existsSync(`./.remote/work/${actionId}`)).to.be.false; + expect(action.steps.some((s) => s.stepName === 'clearBareClone')).to.be.true; }); afterEach(() => { diff --git a/test/processors/hybridCache.integration.test.js b/test/processors/hybridCache.integration.test.js new file mode 100644 index 000000000..cc6bfc412 --- /dev/null +++ b/test/processors/hybridCache.integration.test.js @@ -0,0 +1,201 @@ +const fs = require('fs'); +const chai = require('chai'); +const pullRemote = require('../../src/proxy/processors/push-action/pullRemote').exec; +const clearBareClone = require('../../src/proxy/processors/push-action/clearBareClone').exec; +const { Action } = require('../../src/proxy/actions/Action'); +const { cacheManager } = require('../../src/proxy/processors/push-action/cache-manager'); + +chai.should(); +const expect = chai.expect; + +describe('Hybrid Cache Integration Tests', () => { + const testRepoUrl = 'https://github.com/finos/git-proxy.git'; + const testRepoName = 'finos/git-proxy.git'; + const authorization = `Basic ${Buffer.from('test:test').toString('base64')}`; + + // Shared test data populated by before() hook + let testData = { + cacheMissAction: null, + cacheHitAction: null, + cacheMissDuration: 0, + cacheHitDuration: 0, + bareRepoPath: './.remote/cache/git-proxy.git', + inodeBefore: null, + inodeAfter: null, + }; + + before(async function () { + this.timeout(30000); + + console.log('\n === Setting up test data (one-time setup) ==='); + + // Clean up before starting + if (fs.existsSync('./.remote')) { + fs.rmSync('./.remote', { recursive: true, force: true }); + } + + const cacheMissActionId = 'cache-miss-' + Date.now(); + const cacheHitActionId = 'cache-hit-' + Date.now(); + + // First clone - cache MISS + console.log('Executing cache MISS...'); + const cacheMissAction = new Action(cacheMissActionId, 'push', 'POST', Date.now(), testRepoName); + cacheMissAction.url = testRepoUrl; + + const cacheMissStart = Date.now(); + await pullRemote({ headers: { authorization } }, cacheMissAction); + testData.cacheMissDuration = Date.now() - cacheMissStart; + testData.cacheMissAction = cacheMissAction; + + console.log(`Cache MISS completed in ${testData.cacheMissDuration}ms`); + + // Get inode before second clone + const bareRepoStatsBefore = fs.statSync(testData.bareRepoPath); + testData.inodeBefore = bareRepoStatsBefore.ino; + + // Wait a bit to ensure different timestamps + await new Promise((resolve) => setTimeout(resolve, 1000)); + + // Second clone - cache HIT + console.log('Executing cache HIT...'); + const cacheHitAction = new Action(cacheHitActionId, 'push', 'POST', Date.now(), testRepoName); + cacheHitAction.url = testRepoUrl; + + const cacheHitStart = Date.now(); + await pullRemote({ headers: { authorization } }, cacheHitAction); + testData.cacheHitDuration = Date.now() - cacheHitStart; + testData.cacheHitAction = cacheHitAction; + + console.log(`Cache HIT completed in ${testData.cacheHitDuration}ms`); + + // Get inode after second clone + const bareRepoStatsAfter = fs.statSync(testData.bareRepoPath); + testData.inodeAfter = bareRepoStatsAfter.ino; + }); + + after(() => { + // Clean up all .remote directories after all tests + if (fs.existsSync('./.remote')) { + fs.rmSync('./.remote', { recursive: true, force: true }); + } + }); + + describe('Cache MISS (first clone)', () => { + it('should create bare cache repository', () => { + // Verify bare cache was created + expect(fs.existsSync(testData.bareRepoPath)).to.be.true; + + // Verify it's a bare repository (has config, refs, objects) + expect(fs.existsSync(`${testData.bareRepoPath}/config`)).to.be.true; + expect(fs.existsSync(`${testData.bareRepoPath}/refs`)).to.be.true; + expect(fs.existsSync(`${testData.bareRepoPath}/objects`)).to.be.true; + }); + + it('should create working copy with actual files', () => { + const actionId = testData.cacheMissAction.id; + + // Verify working copy was created + expect(fs.existsSync(`./.remote/work/${actionId}`)).to.be.true; + + // Check the content inside working copy directory + const workCopyContents = fs.readdirSync(`./.remote/work/${actionId}`); + expect(workCopyContents.length).to.be.greaterThan(0); + + // Verify we have a git repository directory inside + const repoDir = workCopyContents.find((item) => item.includes('git-proxy')); + expect(repoDir).to.exist; + + // Verify it has .git folder (not bare) + expect(fs.existsSync(`./.remote/work/${actionId}/${repoDir}/.git`)).to.be.true; + + // Verify working copy has actual files + expect(fs.existsSync(`./.remote/work/${actionId}/${repoDir}/package.json`)).to.be.true; + }); + }); + + describe('Cache HIT (second clone)', () => { + it('should reuse existing bare cache (not recreate)', () => { + // Verify bare cache still exists + expect(fs.existsSync(testData.bareRepoPath)).to.be.true; + + // Same inode means same directory (not recreated) + expect(testData.inodeAfter).to.equal(testData.inodeBefore); + }); + + it('should create new isolated working copy', () => { + const cacheMissActionId = testData.cacheMissAction.id; + const cacheHitActionId = testData.cacheHitAction.id; + + // Verify new working copy was created + expect(fs.existsSync(`./.remote/work/${cacheHitActionId}`)).to.be.true; + + // Verify both working copies exist (isolated) + expect(fs.existsSync(`./.remote/work/${cacheMissActionId}`)).to.be.true; + expect(fs.existsSync(`./.remote/work/${cacheHitActionId}`)).to.be.true; + + // Verify they are different directories + expect(cacheMissActionId).to.not.equal(cacheHitActionId); + }); + + it('should be faster than cache MISS', () => { + console.log(` Cache MISS: ${testData.cacheMissDuration}ms`); + console.log(` Cache HIT: ${testData.cacheHitDuration}ms`); + console.log( + ` Performance improvement: ${Math.round((1 - testData.cacheHitDuration / testData.cacheMissDuration) * 100)}%`, + ); + + expect(testData.cacheHitDuration).to.be.lessThan(testData.cacheMissDuration); + }); + }); + + describe('Hybrid cache structure', () => { + it('should maintain separate bare cache and working directories', () => { + // Verify directory structure + expect(fs.existsSync('./.remote/cache')).to.be.true; + expect(fs.existsSync('./.remote/work')).to.be.true; + + // Verify bare cache contains .git repositories + const cacheContents = fs.readdirSync('./.remote/cache'); + expect(cacheContents.some((name) => name.endsWith('.git'))).to.be.true; + + // Verify work directory contains action-specific folders + const workContents = fs.readdirSync('./.remote/work'); + expect(workContents.length).to.be.at.least(2); // At least 2 working copies + }); + + it('should share one bare cache for multiple working copies', () => { + const cacheContents = fs.readdirSync('./.remote/cache'); + const gitProxyRepos = cacheContents.filter((name) => name.includes('git-proxy')); + + // Should be only one bare cache for git-proxy + expect(gitProxyRepos.length).to.equal(1); + }); + }); + + describe('Cache manager integration', () => { + it('should track cache statistics', () => { + const stats = cacheManager.getCacheStats(); + + expect(stats.totalRepositories).to.be.at.least(1); + expect(stats.repositories).to.be.an('array'); + expect(stats.repositories.length).to.be.at.least(1); + + const gitProxyRepo = stats.repositories.find((r) => r.name === 'git-proxy.git'); + expect(gitProxyRepo).to.exist; + expect(gitProxyRepo.sizeBytes).to.be.greaterThan(0); + expect(gitProxyRepo.lastAccessed).to.be.instanceOf(Date); + }); + }); + + describe('Cache cleanup', () => { + it('should remove working copy and enforce cache limits', async () => { + expect(fs.existsSync('./.remote')).to.be.true; + + const actionId = testData.cacheMissAction.id; + await clearBareClone(null, testData.cacheMissAction); + + expect(fs.existsSync(`./.remote/work/${actionId}`)).to.be.false; + expect(fs.existsSync('./.remote/cache')).to.be.true; + }); + }); +}); diff --git a/test/testConfig.test.js b/test/testConfig.test.js index c099dffea..76440158f 100644 --- a/test/testConfig.test.js +++ b/test/testConfig.test.js @@ -140,6 +140,19 @@ describe('user configuration', function () { expect(config.getRateLimit().limit).to.be.eql(limitConfig.rateLimit.limit); }); + it('should merge partial cache config with defaults', function () { + const user = { cache: { maxSizeGB: 5 } }; + fs.writeFileSync(tempUserFile, JSON.stringify(user)); + + const config = require('../src/config'); + config.invalidateCache(); + + const cacheConfig = config.getCacheConfig(); + expect(cacheConfig.maxSizeGB).to.be.eql(5); + expect(cacheConfig.maxRepositories).to.be.eql(defaultSettings.cache.maxRepositories); + expect(cacheConfig.cacheDir).to.be.eql(defaultSettings.cache.cacheDir); + }); + it('should override default settings for attestation config', function () { const user = { attestationConfig: {