🤖 feat: add support for Ollama local models

ammar-agent · ammar-agent · commit 6fbac34a2482 · 2025-11-08T01:46:04.000Z
Integrates ollama-ai-provider-v2 to enable running AI models locally through Ollama without requiring API keys. Changes: - Add ollama-ai-provider-v2 dependency - Implement Ollama provider in aiService.ts with lazy loading - Add OllamaProviderOptions type for future extensibility - Support Ollama model display formatting (e.g., llama3.2:7b -> Llama 3.2 (7B)) - Update providers.jsonc template with Ollama configuration example - Add comprehensive Ollama documentation to models.md - Add unit tests for Ollama model name formatting Ollama is a local service that doesn't require API keys. Users can run any model from the Ollama Library (https://ollama.com/library) locally. Example configuration in ~/.cmux/providers.jsonc: { "ollama": { "baseUrl": "http://localhost:11434" } } Example model usage: ollama:llama3.2:7b _Generated with `cmux`_
diff --git a/bun.lock b/bun.lock
@@ -28,6 +28,7 @@
         "lru-cache": "^11.2.2",
         "markdown-it": "^14.1.0",
         "minimist": "^1.2.8",
+        "ollama-ai-provider-v2": "^1.5.3",
         "rehype-harden": "^1.1.5",
         "shescape": "^2.1.6",
         "source-map-support": "^0.5.21",
@@ -2238,6 +2239,8 @@
 
     "object.values": ["object.values@1.2.1", "", { "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.3", "define-properties": "^1.2.1", "es-object-atoms": "^1.0.0" } }, "sha512-gXah6aZrcUxjWg2zR2MwouP2eHlCBzdV4pygudehaKXSGW4v2AsRQUK+lwwXhii6KFZcunEnmSUoYp5CXibxtA=="],
 
+    "ollama-ai-provider-v2": ["ollama-ai-provider-v2@1.5.3", "", { "dependencies": { "@ai-sdk/provider": "^2.0.0", "@ai-sdk/provider-utils": "^3.0.7" }, "peerDependencies": { "zod": "^4.0.16" } }, "sha512-LnpvKuxNJyE+cB03cfUjFJnaiBJoUqz3X97GFc71gz09gOdrxNh1AsVBxrpw3uX5aiMxRIWPOZ8god0dHSChsg=="],
+
     "on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="],
 
     "once": ["once@1.4.0", "", { "dependencies": { "wrappy": "1" } }, "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w=="],
diff --git a/docs/models.md b/docs/models.md
@@ -4,17 +4,81 @@ See also:
 
 - [System Prompt](./system-prompt.md)
 
-Currently we support the Sonnet 4 models and GPT-5 family of models:
+cmux supports multiple AI providers through its flexible provider architecture.
+
+### Supported Providers
+
+#### Anthropic (Cloud)
+
+Best supported provider with full feature support:
 
 - `anthropic:claude-sonnet-4-5`
 - `anthropic:claude-opus-4-1`
+
+#### OpenAI (Cloud)
+
+GPT-5 family of models:
+
 - `openai:gpt-5`
 - `openai:gpt-5-pro`
 - `openai:gpt-5-codex`
 
-And we intend to always support the models used by 90% of the community.
-
-Anthropic models are better supported than GPT-5 class models due to an outstanding issue in the
-Vercel AI SDK.
+**Note:** Anthropic models are better supported than GPT-5 class models due to an outstanding issue in the Vercel AI SDK.
 
 TODO: add issue link here.
+
+#### Ollama (Local)
+
+Run models locally with Ollama. No API key required:
+
+- `ollama:llama3.2:7b`
+- `ollama:llama3.2:13b`
+- `ollama:codellama:7b`
+- `ollama:qwen2.5:7b`
+- Any model from the [Ollama Library](https://ollama.com/library)
+
+**Setup:**
+
+1. Install Ollama from [ollama.com](https://ollama.com)
+2. Pull a model: `ollama pull llama3.2:7b`
+3. Configure in `~/.cmux/providers.jsonc`:
+
+```jsonc
+{
+  "ollama": {
+    // Default configuration - Ollama runs on localhost:11434
+    "baseUrl": "http://localhost:11434"
+  }
+}
+```
+
+For remote Ollama instances, update `baseUrl` to point to your server.
+
+### Provider Configuration
+
+All providers are configured in `~/.cmux/providers.jsonc`. See example configurations:
+
+```jsonc
+{
+  "anthropic": {
+    "apiKey": "sk-ant-..."
+  },
+  "openai": {
+    "apiKey": "sk-..."
+  },
+  "ollama": {
+    "baseUrl": "http://localhost:11434"  // Default - only needed if different
+  }
+}
+```
+
+### Model Selection
+
+Use the Command Palette (`Cmd+Shift+P`) to switch models:
+
+1. Open Command Palette
+2. Type "model"
+3. Select "Change Model"
+4. Choose from available models
+
+Models are specified in the format: `provider:model-name`
diff --git a/package.json b/package.json
@@ -69,6 +69,7 @@
     "lru-cache": "^11.2.2",
     "markdown-it": "^14.1.0",
     "minimist": "^1.2.8",
+    "ollama-ai-provider-v2": "^1.5.3",
     "rehype-harden": "^1.1.5",
     "shescape": "^2.1.6",
     "source-map-support": "^0.5.21",
diff --git a/src/config.ts b/src/config.ts
@@ -426,8 +426,13 @@ export class Config {
 // Example:
 // {
 //   "anthropic": {
-//     "apiKey": "sk-...",
-//     "baseUrl": "https://api.anthropic.com"
+//     "apiKey": "sk-ant-..."
+//   },
+//   "openai": {
+//     "apiKey": "sk-..."
+//   },
+//   "ollama": {
+//     "baseUrl": "http://localhost:11434"
 //   }
 // }
 ${jsonString}`;
diff --git a/src/services/aiService.ts b/src/services/aiService.ts
@@ -93,15 +93,19 @@ if (typeof globalFetchWithExtras.certificate === "function") {
 
 /**
  * Preload AI SDK provider modules to avoid race conditions in concurrent test environments.
- * This function loads @ai-sdk/anthropic and @ai-sdk/openai eagerly so that subsequent
- * dynamic imports in createModel() hit the module cache instead of racing.
+ * This function loads @ai-sdk/anthropic, @ai-sdk/openai, and ollama-ai-provider-v2 eagerly
+ * so that subsequent dynamic imports in createModel() hit the module cache instead of racing.
  *
  * In production, providers are lazy-loaded on first use to optimize startup time.
  * In tests, we preload them once during setup to ensure reliable concurrent execution.
  */
 export async function preloadAISDKProviders(): Promise<void> {
   // Preload providers to ensure they're in the module cache before concurrent tests run
-  await Promise.all([import("@ai-sdk/anthropic"), import("@ai-sdk/openai")]);
+  await Promise.all([
+    import("@ai-sdk/anthropic"),
+    import("@ai-sdk/openai"),
+    import("ollama-ai-provider-v2"),
+  ]);
 }
 
 export class AIService extends EventEmitter {
@@ -372,6 +376,25 @@ export class AIService extends EventEmitter {
         return Ok(model);
       }
 
+      // Handle Ollama provider
+      if (providerName === "ollama") {
+        // Ollama doesn't require API key - it's a local service
+        // Use custom fetch if provided, otherwise default with unlimited timeout
+        const baseFetch =
+          typeof providerConfig.fetch === "function"
+            ? (providerConfig.fetch as typeof fetch)
+            : defaultFetchWithUnlimitedTimeout;
+
+        // Lazy-load Ollama provider to reduce startup time
+        const { createOllama } = await import("ollama-ai-provider-v2");
+        const provider = createOllama({
+          ...providerConfig,
+          // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment
+          fetch: baseFetch as any,
+        });
+        return Ok(provider(modelId));
+      }
+
       return Err({
         type: "provider_not_supported",
         provider: providerName,
diff --git a/src/types/providerOptions.ts b/src/types/providerOptions.ts
@@ -29,11 +29,20 @@ export interface OpenAIProviderOptions {
   simulateToolPolicyNoop?: boolean;
 }
 
+/**
+ * Ollama-specific options
+ * Currently empty - Ollama is a local service and doesn't require special options.
+ * This interface is provided for future extensibility.
+ */
+// eslint-disable-next-line @typescript-eslint/no-empty-object-type
+export interface OllamaProviderOptions {}
+
 /**
  * Cmux provider options - used by both frontend and backend
  */
 export interface CmuxProviderOptions {
   /** Provider-specific options */
   anthropic?: AnthropicProviderOptions;
   openai?: OpenAIProviderOptions;
+  ollama?: OllamaProviderOptions;
 }
diff --git a/src/utils/ai/modelDisplay.test.ts b/src/utils/ai/modelDisplay.test.ts
@@ -0,0 +1,55 @@
+import { describe, expect, test } from "bun:test";
+import { formatModelDisplayName } from "./modelDisplay";
+
+describe("formatModelDisplayName", () => {
+  describe("Claude models", () => {
+    test("formats Sonnet models", () => {
+      expect(formatModelDisplayName("claude-sonnet-4-5")).toBe("Sonnet 4.5");
+      expect(formatModelDisplayName("claude-sonnet-4")).toBe("Sonnet 4");
+    });
+
+    test("formats Opus models", () => {
+      expect(formatModelDisplayName("claude-opus-4-1")).toBe("Opus 4.1");
+    });
+  });
+
+  describe("GPT models", () => {
+    test("formats GPT models", () => {
+      expect(formatModelDisplayName("gpt-5-pro")).toBe("GPT-5 Pro");
+      expect(formatModelDisplayName("gpt-4o")).toBe("GPT-4o");
+      expect(formatModelDisplayName("gpt-4o-mini")).toBe("GPT-4o Mini");
+    });
+  });
+
+  describe("Gemini models", () => {
+    test("formats Gemini models", () => {
+      expect(formatModelDisplayName("gemini-2-0-flash-exp")).toBe("Gemini 2.0 Flash Exp");
+    });
+  });
+
+  describe("Ollama models", () => {
+    test("formats Llama models with size", () => {
+      expect(formatModelDisplayName("llama3.2:7b")).toBe("Llama 3.2 (7B)");
+      expect(formatModelDisplayName("llama3.2:13b")).toBe("Llama 3.2 (13B)");
+    });
+
+    test("formats Codellama models with size", () => {
+      expect(formatModelDisplayName("codellama:7b")).toBe("Codellama (7B)");
+      expect(formatModelDisplayName("codellama:13b")).toBe("Codellama (13B)");
+    });
+
+    test("formats Qwen models with size", () => {
+      expect(formatModelDisplayName("qwen2.5:7b")).toBe("Qwen 2.5 (7B)");
+    });
+
+    test("handles models without size suffix", () => {
+      expect(formatModelDisplayName("llama3")).toBe("Llama3");
+    });
+  });
+
+  describe("fallback formatting", () => {
+    test("capitalizes dash-separated parts", () => {
+      expect(formatModelDisplayName("custom-model-name")).toBe("Custom Model Name");
+    });
+  });
+});
diff --git a/src/utils/ai/modelDisplay.ts b/src/utils/ai/modelDisplay.ts
@@ -85,6 +85,23 @@ export function formatModelDisplayName(modelName: string): string {
     }
   }
 
+  // Ollama models - handle format like "llama3.2:7b" or "codellama:13b"
+  // Split by colon to handle quantization/size suffix
+  const [baseName, size] = modelName.split(":");
+  if (size) {
+    // "llama3.2:7b" -> "Llama 3.2 (7B)"
+    // "codellama:13b" -> "Codellama (13B)"
+    const formatted = baseName
+      .split(/(\d+\.?\d*)/)
+      .map((part, idx) => {
+        if (idx === 0) return capitalize(part);
+        if (/^\d+\.?\d*$/.test(part)) return ` ${part}`;
+        return part;
+      })
+      .join("");
+    return `${formatted.trim()} (${size.toUpperCase()})`;
+  }
+
   // Fallback: capitalize first letter of each dash-separated part
   return modelName.split("-").map(capitalize).join(" ");
 }