Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
6fbac34
🤖 feat: add support for Ollama local models
ammar-agent Nov 8, 2025
9021ffd
🤖 test: add Ollama integration tests with CI support
ammar-agent Nov 8, 2025
a268216
🤖 ci: add caching for Ollama models
ammar-agent Nov 8, 2025
472270c
🤖 fix: format docs/models.md
ammar-agent Nov 8, 2025
94d4aa9
🤖 refactor: address review comments
ammar-agent Nov 8, 2025
6f8976b
🤖 fix: resolve Ollama integration test timing issues
ammar-agent Nov 8, 2025
6d48ecf
🤖 style: format ollama test file
ammar-agent Nov 8, 2025
5df1cf8
🤖 refactor: cleanup and consistency improvements
ammar-agent Nov 8, 2025
4cd2491
🤖 refactor: remove unused variable from EventCollector
ammar-agent Nov 8, 2025
5081dce
🤖 perf: optimize Ollama CI caching to <5s startup
ammar-agent Nov 8, 2025
1b577db
🤖 feat: add multi-pattern lookup for Ollama model context limits
ammar-agent Nov 8, 2025
f997fbe
🤖 perf: use stable cache key for Ollama (v3 without workflow hash)
ammar-agent Nov 8, 2025
872c6df
🤖 ci: trigger CI after resolving Codex comment
ammar-agent Nov 8, 2025
7fa5c47
🤖 fix: set permissions on Ollama directory for cache saving
ammar-agent Nov 8, 2025
5a4978e
🤖 ci: test warm cache after v3 saved
ammar-agent Nov 8, 2025
c7b245d
🤖 refactor: cache models in /tmp to avoid permission issues
ammar-agent Nov 8, 2025
09015ae
🤖 refactor: simplify Ollama setup (remove caching complexity)
ammar-agent Nov 8, 2025
c10ffcb
🤖 refactor: create setup-ollama action with caching
ammar-agent Nov 8, 2025
4db87ae
🤖 fix: properly stop Ollama process after model pull
ammar-agent Nov 8, 2025
fecacc0
🤖 fix: use absolute path for Ollama cache (~ doesn't expand)
ammar-agent Nov 8, 2025
87a76a7
🤖 debug: add directory listing to verify cache contents
ammar-agent Nov 8, 2025
ffeec29
🤖 debug: check both possible model locations
ammar-agent Nov 8, 2025
2cc309c
🤖 debug: bump cache version to v3 for fresh test
ammar-agent Nov 8, 2025
dfaa011
🤖 debug: remove restore-keys to force cache miss
ammar-agent Nov 8, 2025
75d6c05
🤖 fix: copy models from system location to cacheable location
ammar-agent Nov 8, 2025
7f9c95e
🤖 refactor: simplify ollama caching with binary-only install
ammar-agent Nov 8, 2025
f82f5a7
🤖 fix: bump cache version to v2 to invalidate empty cache
ammar-agent Nov 8, 2025
ab90e9b
🤖 refactor: move Ollama model pull to test-side for better parallelism
ammar-agent Nov 8, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions .github/actions/setup-ollama/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
name: Setup Ollama
description: Install Ollama binary and restore model cache (tests pull models idempotently)

runs:
using: composite
steps:
- name: Cache Ollama binary
id: cache-ollama-binary
uses: actions/cache@v4
with:
path: ./.ollama-install
key: ${{ runner.os }}-ollama-binary-v2

- name: Cache Ollama models
id: cache-ollama-models
uses: actions/cache@v4
with:
path: ~/.ollama
key: ${{ runner.os }}-ollama-models-v2

- name: Install Ollama binary (cache miss)
if: steps.cache-ollama-binary.outputs.cache-hit != 'true'
shell: bash
run: |
echo "Downloading Ollama binary..."
ARCH=$(uname -m)
case "$ARCH" in
x86_64) ARCH="amd64" ;;
aarch64|arm64) ARCH="arm64" ;;
*) echo "Unsupported architecture: $ARCH"; exit 1 ;;
esac
curl -L https://ollama.com/download/ollama-linux-${ARCH}.tgz -o ollama.tgz
mkdir -p .ollama-install
tar -C .ollama-install -xzf ollama.tgz
rm ollama.tgz
echo "Ollama binary downloaded"

- name: Add Ollama to PATH
shell: bash
run: |
echo "$(pwd)/.ollama-install/bin" >> $GITHUB_PATH

- name: Start Ollama server
shell: bash
run: |
echo "Starting Ollama server..."
ollama start &
sleep 2
echo "Ollama server started"

- name: Verify Ollama
shell: bash
run: |
ollama --version
echo "Ollama binary ready - tests will pull models idempotently"

- name: Verify cache status
shell: bash
run: |
if [[ "${{ steps.cache-ollama-models.outputs.cache-hit }}" == "true" ]]; then
echo "Model cache restored - available for tests"
ls -lh "$HOME/.ollama" || echo "Warning: .ollama directory not found"
else
echo "Model cache miss - tests will pull models on first run"
fi
12 changes: 12 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,17 @@ jobs:

- uses: ./.github/actions/setup-cmux

- name: Setup Ollama
uses: ./.github/actions/setup-ollama

# Ollama server started by setup-ollama action
# Tests will pull models idempotently
- name: Verify Ollama server
run: |
echo "Verifying Ollama server..."
timeout 5 sh -c 'until curl -sf http://localhost:11434/api/tags > /dev/null 2>&1; do sleep 0.2; done'
echo "Ollama ready - integration tests will pull models on demand"

- name: Build worker files
run: make build-main

Expand All @@ -108,6 +119,7 @@ jobs:
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OLLAMA_BASE_URL: http://localhost:11434/api

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
Expand Down
3 changes: 3 additions & 0 deletions bun.lock
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"lru-cache": "^11.2.2",
"markdown-it": "^14.1.0",
"minimist": "^1.2.8",
"ollama-ai-provider-v2": "^1.5.3",
"rehype-harden": "^1.1.5",
"shescape": "^2.1.6",
"source-map-support": "^0.5.21",
Expand Down Expand Up @@ -2238,6 +2239,8 @@

"object.values": ["object.values@1.2.1", "", { "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.3", "define-properties": "^1.2.1", "es-object-atoms": "^1.0.0" } }, "sha512-gXah6aZrcUxjWg2zR2MwouP2eHlCBzdV4pygudehaKXSGW4v2AsRQUK+lwwXhii6KFZcunEnmSUoYp5CXibxtA=="],

"ollama-ai-provider-v2": ["ollama-ai-provider-v2@1.5.3", "", { "dependencies": { "@ai-sdk/provider": "^2.0.0", "@ai-sdk/provider-utils": "^3.0.7" }, "peerDependencies": { "zod": "^4.0.16" } }, "sha512-LnpvKuxNJyE+cB03cfUjFJnaiBJoUqz3X97GFc71gz09gOdrxNh1AsVBxrpw3uX5aiMxRIWPOZ8god0dHSChsg=="],

"on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="],

"once": ["once@1.4.0", "", { "dependencies": { "wrappy": "1" } }, "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w=="],
Expand Down
73 changes: 68 additions & 5 deletions docs/models.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,80 @@ See also:

- [System Prompt](./system-prompt.md)

Currently we support the Sonnet 4 models and GPT-5 family of models:
cmux supports multiple AI providers through its flexible provider architecture.

### Supported Providers

#### Anthropic (Cloud)

Best supported provider with full feature support:

- `anthropic:claude-sonnet-4-5`
- `anthropic:claude-opus-4-1`

#### OpenAI (Cloud)

GPT-5 family of models:

- `openai:gpt-5`
- `openai:gpt-5-pro`
- `openai:gpt-5-codex`

And we intend to always support the models used by 90% of the community.

Anthropic models are better supported than GPT-5 class models due to an outstanding issue in the
Vercel AI SDK.
**Note:** Anthropic models are better supported than GPT-5 class models due to an outstanding issue in the Vercel AI SDK.

TODO: add issue link here.

#### Ollama (Local)

Run models locally with Ollama. No API key required:

- `ollama:gpt-oss:20b`
- `ollama:gpt-oss:120b`
- `ollama:qwen3-coder:30b`
- Any model from the [Ollama Library](https://ollama.com/library)

**Setup:**

1. Install Ollama from [ollama.com](https://ollama.com)
2. Pull a model: `ollama pull gpt-oss:20b`
3. Configure in `~/.cmux/providers.jsonc`:

```jsonc
{
"ollama": {
// Default configuration - Ollama runs on localhost:11434
"baseUrl": "http://localhost:11434/api",
},
}
```

For remote Ollama instances, update `baseUrl` to point to your server.

### Provider Configuration

All providers are configured in `~/.cmux/providers.jsonc`. See example configurations:

```jsonc
{
"anthropic": {
"apiKey": "sk-ant-...",
},
"openai": {
"apiKey": "sk-...",
},
"ollama": {
"baseUrl": "http://localhost:11434/api", // Default - only needed if different
},
}
```

### Model Selection

Use the Command Palette (`Cmd+Shift+P`) to switch models:

1. Open Command Palette
2. Type "model"
3. Select "Change Model"
4. Choose from available models

Models are specified in the format: `provider:model-name`
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
"lru-cache": "^11.2.2",
"markdown-it": "^14.1.0",
"minimist": "^1.2.8",
"ollama-ai-provider-v2": "^1.5.3",
"rehype-harden": "^1.1.5",
"shescape": "^2.1.6",
"source-map-support": "^0.5.21",
Expand Down
9 changes: 7 additions & 2 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -426,8 +426,13 @@ export class Config {
// Example:
// {
// "anthropic": {
// "apiKey": "sk-...",
// "baseUrl": "https://api.anthropic.com"
// "apiKey": "sk-ant-..."
// },
// "openai": {
// "apiKey": "sk-..."
// },
// "ollama": {
// "baseUrl": "http://localhost:11434/api"
// }
// }
${jsonString}`;
Expand Down
47 changes: 42 additions & 5 deletions src/services/aiService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,15 +93,19 @@ if (typeof globalFetchWithExtras.certificate === "function") {

/**
* Preload AI SDK provider modules to avoid race conditions in concurrent test environments.
* This function loads @ai-sdk/anthropic and @ai-sdk/openai eagerly so that subsequent
* dynamic imports in createModel() hit the module cache instead of racing.
* This function loads @ai-sdk/anthropic, @ai-sdk/openai, and ollama-ai-provider-v2 eagerly
* so that subsequent dynamic imports in createModel() hit the module cache instead of racing.
*
* In production, providers are lazy-loaded on first use to optimize startup time.
* In tests, we preload them once during setup to ensure reliable concurrent execution.
*/
export async function preloadAISDKProviders(): Promise<void> {
// Preload providers to ensure they're in the module cache before concurrent tests run
await Promise.all([import("@ai-sdk/anthropic"), import("@ai-sdk/openai")]);
await Promise.all([
import("@ai-sdk/anthropic"),
import("@ai-sdk/openai"),
import("ollama-ai-provider-v2"),
]);
}

export class AIService extends EventEmitter {
Expand Down Expand Up @@ -228,7 +232,17 @@ export class AIService extends EventEmitter {
): Promise<Result<LanguageModel, SendMessageError>> {
try {
// Parse model string (format: "provider:model-id")
const [providerName, modelId] = modelString.split(":");
// Only split on the first colon to support model IDs with colons (e.g., "ollama:gpt-oss:20b")
const colonIndex = modelString.indexOf(":");
if (colonIndex === -1) {
return Err({
type: "invalid_model_string",
message: `Invalid model string format: "${modelString}". Expected "provider:model-id"`,
});
}

const providerName = modelString.slice(0, colonIndex);
const modelId = modelString.slice(colonIndex + 1);

if (!providerName || !modelId) {
return Err({
Expand Down Expand Up @@ -372,6 +386,27 @@ export class AIService extends EventEmitter {
return Ok(model);
}

// Handle Ollama provider
if (providerName === "ollama") {
// Ollama doesn't require API key - it's a local service
// Use custom fetch if provided, otherwise default with unlimited timeout
const baseFetch =
typeof providerConfig.fetch === "function"
? (providerConfig.fetch as typeof fetch)
: defaultFetchWithUnlimitedTimeout;

// Lazy-load Ollama provider to reduce startup time
const { createOllama } = await import("ollama-ai-provider-v2");
const provider = createOllama({
...providerConfig,
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment
fetch: baseFetch as any,
// Use strict mode for better compatibility with Ollama API
compatibility: "strict",
});
return Ok(provider(modelId));
}

return Err({
type: "provider_not_supported",
provider: providerName,
Expand Down Expand Up @@ -433,7 +468,9 @@ export class AIService extends EventEmitter {
log.debug_obj(`${workspaceId}/1_original_messages.json`, messages);

// Extract provider name from modelString (e.g., "anthropic:claude-opus-4-1" -> "anthropic")
const [providerName] = modelString.split(":");
// Use indexOf to handle model IDs with colons (e.g., "ollama:gpt-oss:20b")
const colonIndex = modelString.indexOf(":");
const providerName = colonIndex !== -1 ? modelString.slice(0, colonIndex) : modelString;

// Get tool names early for mode transition sentinel (stub config, no workspace context needed)
const earlyRuntime = createRuntime({ type: "local", srcBaseDir: process.cwd() });
Expand Down
3 changes: 1 addition & 2 deletions src/services/streamManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -627,12 +627,11 @@ export class StreamManager extends EventEmitter {
// Check if stream was cancelled BEFORE processing any parts
// This improves interruption responsiveness by catching aborts earlier
if (streamInfo.abortController.signal.aborted) {
log.debug("streamManager: Stream aborted, breaking from loop");
break;
}

// Log all stream parts to debug reasoning (commented out - too spammy)
// log.debug("streamManager: Stream part", {
// console.log("[DEBUG streamManager]: Stream part", {
// type: part.type,
// hasText: "text" in part,
// preview: "text" in part ? (part as StreamPartWithText).text?.substring(0, 50) : undefined,
Expand Down
9 changes: 9 additions & 0 deletions src/types/providerOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,20 @@ export interface OpenAIProviderOptions {
simulateToolPolicyNoop?: boolean;
}

/**
* Ollama-specific options
* Currently empty - Ollama is a local service and doesn't require special options.
* This interface is provided for future extensibility.
*/
// eslint-disable-next-line @typescript-eslint/no-empty-object-type
export interface OllamaProviderOptions {}

/**
* Cmux provider options - used by both frontend and backend
*/
export interface CmuxProviderOptions {
/** Provider-specific options */
anthropic?: AnthropicProviderOptions;
openai?: OpenAIProviderOptions;
ollama?: OllamaProviderOptions;
}
55 changes: 55 additions & 0 deletions src/utils/ai/modelDisplay.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import { describe, expect, test } from "bun:test";
import { formatModelDisplayName } from "./modelDisplay";

describe("formatModelDisplayName", () => {
describe("Claude models", () => {
test("formats Sonnet models", () => {
expect(formatModelDisplayName("claude-sonnet-4-5")).toBe("Sonnet 4.5");
expect(formatModelDisplayName("claude-sonnet-4")).toBe("Sonnet 4");
});

test("formats Opus models", () => {
expect(formatModelDisplayName("claude-opus-4-1")).toBe("Opus 4.1");
});
});

describe("GPT models", () => {
test("formats GPT models", () => {
expect(formatModelDisplayName("gpt-5-pro")).toBe("GPT-5 Pro");
expect(formatModelDisplayName("gpt-4o")).toBe("GPT-4o");
expect(formatModelDisplayName("gpt-4o-mini")).toBe("GPT-4o Mini");
});
});

describe("Gemini models", () => {
test("formats Gemini models", () => {
expect(formatModelDisplayName("gemini-2-0-flash-exp")).toBe("Gemini 2.0 Flash Exp");
});
});

describe("Ollama models", () => {
test("formats Llama models with size", () => {
expect(formatModelDisplayName("llama3.2:7b")).toBe("Llama 3.2 (7B)");
expect(formatModelDisplayName("llama3.2:13b")).toBe("Llama 3.2 (13B)");
});

test("formats Codellama models with size", () => {
expect(formatModelDisplayName("codellama:7b")).toBe("Codellama (7B)");
expect(formatModelDisplayName("codellama:13b")).toBe("Codellama (13B)");
});

test("formats Qwen models with size", () => {
expect(formatModelDisplayName("qwen2.5:7b")).toBe("Qwen 2.5 (7B)");
});

test("handles models without size suffix", () => {
expect(formatModelDisplayName("llama3")).toBe("Llama3");
});
});

describe("fallback formatting", () => {
test("capitalizes dash-separated parts", () => {
expect(formatModelDisplayName("custom-model-name")).toBe("Custom Model Name");
});
});
});
Loading
Loading