coder · ThomasK33 · Nov 8, 2025 · Nov 8, 2025 · Nov 8, 2025 · Nov 8, 2025
diff --git a/.github/actions/setup-ollama/action.yml b/.github/actions/setup-ollama/action.yml
@@ -0,0 +1,65 @@
+name: Setup Ollama
+description: Install Ollama binary and restore model cache (tests pull models idempotently)
+
+runs:
+  using: composite
+  steps:
+    - name: Cache Ollama binary
+      id: cache-ollama-binary
+      uses: actions/cache@v4
+      with:
+        path: ./.ollama-install
+        key: ${{ runner.os }}-ollama-binary-v2
+
+    - name: Cache Ollama models
+      id: cache-ollama-models
+      uses: actions/cache@v4
+      with:
+        path: ~/.ollama
+        key: ${{ runner.os }}-ollama-models-v2
+
+    - name: Install Ollama binary (cache miss)
+      if: steps.cache-ollama-binary.outputs.cache-hit != 'true'
+      shell: bash
+      run: |
+        echo "Downloading Ollama binary..."
+        ARCH=$(uname -m)
+        case "$ARCH" in
+            x86_64) ARCH="amd64" ;;
+            aarch64|arm64) ARCH="arm64" ;;
+            *) echo "Unsupported architecture: $ARCH"; exit 1 ;;
+        esac
+        curl -L https://ollama.com/download/ollama-linux-${ARCH}.tgz -o ollama.tgz
+        mkdir -p .ollama-install
+        tar -C .ollama-install -xzf ollama.tgz
+        rm ollama.tgz
+        echo "Ollama binary downloaded"
+
+    - name: Add Ollama to PATH
+      shell: bash
+      run: |
+        echo "$(pwd)/.ollama-install/bin" >> $GITHUB_PATH
+
+    - name: Start Ollama server
+      shell: bash
+      run: |
+        echo "Starting Ollama server..."
+        ollama start &
+        sleep 2
+        echo "Ollama server started"
+
+    - name: Verify Ollama
+      shell: bash
+      run: |
+        ollama --version
+        echo "Ollama binary ready - tests will pull models idempotently"
+
+    - name: Verify cache status
+      shell: bash
+      run: |
+        if [[ "${{ steps.cache-ollama-models.outputs.cache-hit }}" == "true" ]]; then
+          echo "Model cache restored - available for tests"
+          ls -lh "$HOME/.ollama" || echo "Warning: .ollama directory not found"
+        else
+          echo "Model cache miss - tests will pull models on first run"
+        fi
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -99,6 +99,17 @@ jobs:
 
       - uses: ./.github/actions/setup-cmux
 
+      - name: Setup Ollama
+        uses: ./.github/actions/setup-ollama
+
+      # Ollama server started by setup-ollama action
+      # Tests will pull models idempotently
+      - name: Verify Ollama server
+        run: |
+          echo "Verifying Ollama server..."
+          timeout 5 sh -c 'until curl -sf http://localhost:11434/api/tags > /dev/null 2>&1; do sleep 0.2; done'
+          echo "Ollama ready - integration tests will pull models on demand"
+
       - name: Build worker files
         run: make build-main
 
@@ -108,6 +119,7 @@ jobs:
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          OLLAMA_BASE_URL: http://localhost:11434/api
 
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v5

diff --git a/bun.lock b/bun.lock
@@ -28,6 +28,7 @@
         "lru-cache": "^11.2.2",
         "markdown-it": "^14.1.0",
         "minimist": "^1.2.8",
+        "ollama-ai-provider-v2": "^1.5.3",
         "rehype-harden": "^1.1.5",
         "shescape": "^2.1.6",
         "source-map-support": "^0.5.21",
@@ -2238,6 +2239,8 @@
 
     "object.values": ["object.values@1.2.1", "", { "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.3", "define-properties": "^1.2.1", "es-object-atoms": "^1.0.0" } }, "sha512-gXah6aZrcUxjWg2zR2MwouP2eHlCBzdV4pygudehaKXSGW4v2AsRQUK+lwwXhii6KFZcunEnmSUoYp5CXibxtA=="],
 
+    "ollama-ai-provider-v2": ["ollama-ai-provider-v2@1.5.3", "", { "dependencies": { "@ai-sdk/provider": "^2.0.0", "@ai-sdk/provider-utils": "^3.0.7" }, "peerDependencies": { "zod": "^4.0.16" } }, "sha512-LnpvKuxNJyE+cB03cfUjFJnaiBJoUqz3X97GFc71gz09gOdrxNh1AsVBxrpw3uX5aiMxRIWPOZ8god0dHSChsg=="],
+
     "on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="],
 
     "once": ["once@1.4.0", "", { "dependencies": { "wrappy": "1" } }, "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w=="],

diff --git a/docs/models.md b/docs/models.md
@@ -4,17 +4,80 @@ See also:
 
 - [System Prompt](./system-prompt.md)
 
-Currently we support the Sonnet 4 models and GPT-5 family of models:
+cmux supports multiple AI providers through its flexible provider architecture.
+
+### Supported Providers
+
+#### Anthropic (Cloud)
+
+Best supported provider with full feature support:
 
 - `anthropic:claude-sonnet-4-5`
 - `anthropic:claude-opus-4-1`
+
+#### OpenAI (Cloud)
+
+GPT-5 family of models:
+
 - `openai:gpt-5`
 - `openai:gpt-5-pro`
 - `openai:gpt-5-codex`
 
-And we intend to always support the models used by 90% of the community.
-
-Anthropic models are better supported than GPT-5 class models due to an outstanding issue in the
-Vercel AI SDK.
+**Note:** Anthropic models are better supported than GPT-5 class models due to an outstanding issue in the Vercel AI SDK.
 
 TODO: add issue link here.
+
+#### Ollama (Local)
+
+Run models locally with Ollama. No API key required:
+
+- `ollama:gpt-oss:20b`
+- `ollama:gpt-oss:120b`
+- `ollama:qwen3-coder:30b`
+- Any model from the [Ollama Library](https://ollama.com/library)
+
+**Setup:**
+
+1. Install Ollama from [ollama.com](https://ollama.com)
+2. Pull a model: `ollama pull gpt-oss:20b`
+3. Configure in `~/.cmux/providers.jsonc`:
+
+```jsonc
+{
+  "ollama": {
+    // Default configuration - Ollama runs on localhost:11434
+    "baseUrl": "http://localhost:11434/api",
+  },
+}
+```
+
+For remote Ollama instances, update `baseUrl` to point to your server.
+
+### Provider Configuration
+
+All providers are configured in `~/.cmux/providers.jsonc`. See example configurations:
+
+```jsonc
+{
+  "anthropic": {
+    "apiKey": "sk-ant-...",
+  },
+  "openai": {
+    "apiKey": "sk-...",
+  },
+  "ollama": {
+    "baseUrl": "http://localhost:11434/api", // Default - only needed if different
+  },
+}
+```
+
+### Model Selection
+
+Use the Command Palette (`Cmd+Shift+P`) to switch models:
+
+1. Open Command Palette
+2. Type "model"
+3. Select "Change Model"
+4. Choose from available models
+
+Models are specified in the format: `provider:model-name`
diff --git a/package.json b/package.json
@@ -69,6 +69,7 @@
     "lru-cache": "^11.2.2",
     "markdown-it": "^14.1.0",
     "minimist": "^1.2.8",
+    "ollama-ai-provider-v2": "^1.5.3",
     "rehype-harden": "^1.1.5",
     "shescape": "^2.1.6",
     "source-map-support": "^0.5.21",

diff --git a/src/config.ts b/src/config.ts
@@ -426,8 +426,13 @@ export class Config {
 // Example:
 // {
 //   "anthropic": {
-//     "apiKey": "sk-...",
-//     "baseUrl": "https://api.anthropic.com"
+//     "apiKey": "sk-ant-..."
+//   },
+//   "openai": {
+//     "apiKey": "sk-..."
+//   },
+//   "ollama": {
+//     "baseUrl": "http://localhost:11434/api"
 //   }
 // }
 ${jsonString}`;

diff --git a/src/services/aiService.ts b/src/services/aiService.ts
@@ -93,15 +93,19 @@ if (typeof globalFetchWithExtras.certificate === "function") {
 
 /**
  * Preload AI SDK provider modules to avoid race conditions in concurrent test environments.
- * This function loads @ai-sdk/anthropic and @ai-sdk/openai eagerly so that subsequent
- * dynamic imports in createModel() hit the module cache instead of racing.
+ * This function loads @ai-sdk/anthropic, @ai-sdk/openai, and ollama-ai-provider-v2 eagerly
+ * so that subsequent dynamic imports in createModel() hit the module cache instead of racing.
  *
  * In production, providers are lazy-loaded on first use to optimize startup time.
  * In tests, we preload them once during setup to ensure reliable concurrent execution.
  */
 export async function preloadAISDKProviders(): Promise<void> {
   // Preload providers to ensure they're in the module cache before concurrent tests run
-  await Promise.all([import("@ai-sdk/anthropic"), import("@ai-sdk/openai")]);
+  await Promise.all([
+    import("@ai-sdk/anthropic"),
+    import("@ai-sdk/openai"),
+    import("ollama-ai-provider-v2"),
+  ]);
 }
 
 export class AIService extends EventEmitter {
@@ -228,7 +232,17 @@ export class AIService extends EventEmitter {
   ): Promise<Result<LanguageModel, SendMessageError>> {
     try {
       // Parse model string (format: "provider:model-id")
-      const [providerName, modelId] = modelString.split(":");
+      // Only split on the first colon to support model IDs with colons (e.g., "ollama:gpt-oss:20b")
+      const colonIndex = modelString.indexOf(":");
+      if (colonIndex === -1) {
+        return Err({
+          type: "invalid_model_string",
+          message: `Invalid model string format: "${modelString}". Expected "provider:model-id"`,
+        });
+      }
+
+      const providerName = modelString.slice(0, colonIndex);
+      const modelId = modelString.slice(colonIndex + 1);
 
       if (!providerName || !modelId) {
         return Err({
@@ -372,6 +386,27 @@ export class AIService extends EventEmitter {
         return Ok(model);
       }
 
+      // Handle Ollama provider
+      if (providerName === "ollama") {
+        // Ollama doesn't require API key - it's a local service
+        // Use custom fetch if provided, otherwise default with unlimited timeout
+        const baseFetch =
+          typeof providerConfig.fetch === "function"
+            ? (providerConfig.fetch as typeof fetch)
+            : defaultFetchWithUnlimitedTimeout;
+
+        // Lazy-load Ollama provider to reduce startup time
+        const { createOllama } = await import("ollama-ai-provider-v2");
+        const provider = createOllama({
+          ...providerConfig,
+          // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment
+          fetch: baseFetch as any,
+          // Use strict mode for better compatibility with Ollama API
+          compatibility: "strict",
+        });
+        return Ok(provider(modelId));
+      }
+
       return Err({
         type: "provider_not_supported",
         provider: providerName,
@@ -433,7 +468,9 @@ export class AIService extends EventEmitter {
       log.debug_obj(`${workspaceId}/1_original_messages.json`, messages);
 
       // Extract provider name from modelString (e.g., "anthropic:claude-opus-4-1" -> "anthropic")
-      const [providerName] = modelString.split(":");
+      // Use indexOf to handle model IDs with colons (e.g., "ollama:gpt-oss:20b")
+      const colonIndex = modelString.indexOf(":");
+      const providerName = colonIndex !== -1 ? modelString.slice(0, colonIndex) : modelString;
 
       // Get tool names early for mode transition sentinel (stub config, no workspace context needed)
       const earlyRuntime = createRuntime({ type: "local", srcBaseDir: process.cwd() });

diff --git a/src/services/streamManager.ts b/src/services/streamManager.ts
@@ -627,12 +627,11 @@ export class StreamManager extends EventEmitter {
         // Check if stream was cancelled BEFORE processing any parts
         // This improves interruption responsiveness by catching aborts earlier
         if (streamInfo.abortController.signal.aborted) {
-          log.debug("streamManager: Stream aborted, breaking from loop");
           break;
         }
 
         // Log all stream parts to debug reasoning (commented out - too spammy)
-        // log.debug("streamManager: Stream part", {
+        // console.log("[DEBUG streamManager]: Stream part", {
         //   type: part.type,
         //   hasText: "text" in part,
         //   preview: "text" in part ? (part as StreamPartWithText).text?.substring(0, 50) : undefined,

diff --git a/src/types/providerOptions.ts b/src/types/providerOptions.ts
@@ -29,11 +29,20 @@ export interface OpenAIProviderOptions {
   simulateToolPolicyNoop?: boolean;
 }
 
+/**
+ * Ollama-specific options
+ * Currently empty - Ollama is a local service and doesn't require special options.
+ * This interface is provided for future extensibility.
+ */
+// eslint-disable-next-line @typescript-eslint/no-empty-object-type
+export interface OllamaProviderOptions {}
+
 /**
  * Cmux provider options - used by both frontend and backend
  */
 export interface CmuxProviderOptions {
   /** Provider-specific options */
   anthropic?: AnthropicProviderOptions;
   openai?: OpenAIProviderOptions;
+  ollama?: OllamaProviderOptions;
 }
diff --git a/src/utils/ai/modelDisplay.test.ts b/src/utils/ai/modelDisplay.test.ts
@@ -0,0 +1,55 @@
+import { describe, expect, test } from "bun:test";
+import { formatModelDisplayName } from "./modelDisplay";
+
+describe("formatModelDisplayName", () => {
+  describe("Claude models", () => {
+    test("formats Sonnet models", () => {
+      expect(formatModelDisplayName("claude-sonnet-4-5")).toBe("Sonnet 4.5");
+      expect(formatModelDisplayName("claude-sonnet-4")).toBe("Sonnet 4");
+    });
+
+    test("formats Opus models", () => {
+      expect(formatModelDisplayName("claude-opus-4-1")).toBe("Opus 4.1");
+    });
+  });
+
+  describe("GPT models", () => {
+    test("formats GPT models", () => {
+      expect(formatModelDisplayName("gpt-5-pro")).toBe("GPT-5 Pro");
+      expect(formatModelDisplayName("gpt-4o")).toBe("GPT-4o");
+      expect(formatModelDisplayName("gpt-4o-mini")).toBe("GPT-4o Mini");
+    });
+  });
+
+  describe("Gemini models", () => {
+    test("formats Gemini models", () => {
+      expect(formatModelDisplayName("gemini-2-0-flash-exp")).toBe("Gemini 2.0 Flash Exp");
+    });
+  });
+
+  describe("Ollama models", () => {
+    test("formats Llama models with size", () => {
+      expect(formatModelDisplayName("llama3.2:7b")).toBe("Llama 3.2 (7B)");
+      expect(formatModelDisplayName("llama3.2:13b")).toBe("Llama 3.2 (13B)");
+    });
+
+    test("formats Codellama models with size", () => {
+      expect(formatModelDisplayName("codellama:7b")).toBe("Codellama (7B)");
+      expect(formatModelDisplayName("codellama:13b")).toBe("Codellama (13B)");
+    });
+
+    test("formats Qwen models with size", () => {
+      expect(formatModelDisplayName("qwen2.5:7b")).toBe("Qwen 2.5 (7B)");
+    });
+
+    test("handles models without size suffix", () => {
+      expect(formatModelDisplayName("llama3")).toBe("Llama3");
+    });
+  });
+
+  describe("fallback formatting", () => {
+    test("capitalizes dash-separated parts", () => {
+      expect(formatModelDisplayName("custom-model-name")).toBe("Custom Model Name");
+    });
+  });
+});