🤖 feat: add multi-pattern lookup for Ollama model context limits

ammar-agent · ammar-agent · commit 1b577db3ded9 · 2025-11-08T16:49:15.000Z
Fixes context limit display for Ollama models like ollama:gpt-oss:20b.

Problem:
- User model string: ollama:gpt-oss:20b
- Previous lookup: gpt-oss:20b (stripped provider)
- models.json key: ollama/gpt-oss:20b-cloud (LiteLLM convention)
- Result: Lookup failed, showed "Unknown model limits"

Solution:
Implemented multi-pattern fallback lookup that tries:
1. Direct model name (claude-opus-4-1)
2. Provider-prefixed (ollama/gpt-oss:20b)
3. Cloud variant (ollama/gpt-oss:20b-cloud) ← matches!
4. Base model (ollama/gpt-oss) as fallback

Benefits:
- Works automatically for all Ollama models in models.json
- Zero configuration required
- Backward compatible with existing lookups
- No API calls needed (works offline)

Testing:
- Added 15+ unit tests covering all lookup patterns
- Verified ollama:gpt-oss:20b → 131k context limit
- All 979 unit tests pass

Models that now work:
- ollama:gpt-oss:20b → ollama/gpt-oss:20b-cloud (131k)
- ollama:gpt-oss:120b → ollama/gpt-oss:120b-cloud (131k)
- ollama:llama3.1 → ollama/llama3.1 (8k)
- ollama:deepseek-v3.1:671b → ollama/deepseek-v3.1:671b-cloud
- Plus all existing Anthropic/OpenAI models
diff --git a/src/utils/tokens/modelStats.test.ts b/src/utils/tokens/modelStats.test.ts
@@ -1,32 +1,148 @@
+import { describe, expect, test, it } from "bun:test";
 import { getModelStats } from "./modelStats";
 
 describe("getModelStats", () => {
-  it("should return model stats for claude-sonnet-4-5", () => {
-    const stats = getModelStats("anthropic:claude-sonnet-4-5");
+  describe("direct model lookups", () => {
+    test("should find anthropic models by direct name", () => {
+      const stats = getModelStats("anthropic:claude-opus-4-1");
+      expect(stats).not.toBeNull();
+      expect(stats?.max_input_tokens).toBeGreaterThan(0);
+      expect(stats?.input_cost_per_token).toBeGreaterThan(0);
+    });
 
-    expect(stats).not.toBeNull();
-    expect(stats?.input_cost_per_token).toBe(0.000003);
-    expect(stats?.output_cost_per_token).toBe(0.000015);
-    expect(stats?.max_input_tokens).toBe(200000);
+    test("should find openai models by direct name", () => {
+      const stats = getModelStats("openai:gpt-5");
+      expect(stats).not.toBeNull();
+      expect(stats?.max_input_tokens).toBeGreaterThan(0);
+    });
+
+    test("should find models in models-extra.ts", () => {
+      const stats = getModelStats("openai:gpt-5-pro");
+      expect(stats).not.toBeNull();
+      expect(stats?.max_input_tokens).toBe(400000);
+      expect(stats?.input_cost_per_token).toBe(0.000015);
+    });
+  });
+
+  describe("ollama model lookups with cloud suffix", () => {
+    test("should find ollama gpt-oss:20b with cloud suffix", () => {
+      const stats = getModelStats("ollama:gpt-oss:20b");
+      expect(stats).not.toBeNull();
+      expect(stats?.max_input_tokens).toBe(131072);
+      expect(stats?.input_cost_per_token).toBe(0); // Local models are free
+      expect(stats?.output_cost_per_token).toBe(0);
+    });
+
+    test("should find ollama gpt-oss:120b with cloud suffix", () => {
+      const stats = getModelStats("ollama:gpt-oss:120b");
+      expect(stats).not.toBeNull();
+      expect(stats?.max_input_tokens).toBe(131072);
+    });
+
+    test("should find ollama deepseek-v3.1:671b with cloud suffix", () => {
+      const stats = getModelStats("ollama:deepseek-v3.1:671b");
+      expect(stats).not.toBeNull();
+      expect(stats?.max_input_tokens).toBeGreaterThan(0);
+    });
   });
 
-  it("should handle model without provider prefix", () => {
-    const stats = getModelStats("claude-sonnet-4-5");
+  describe("ollama model lookups without cloud suffix", () => {
+    test("should find ollama llama3.1 directly", () => {
+      const stats = getModelStats("ollama:llama3.1");
+      expect(stats).not.toBeNull();
+      expect(stats?.max_input_tokens).toBeGreaterThan(0);
+    });
 
-    expect(stats).not.toBeNull();
-    expect(stats?.input_cost_per_token).toBe(0.000003);
+    test("should find ollama llama3:8b with size variant", () => {
+      const stats = getModelStats("ollama:llama3:8b");
+      expect(stats).not.toBeNull();
+      expect(stats?.max_input_tokens).toBeGreaterThan(0);
+    });
+
+    test("should find ollama codellama", () => {
+      const stats = getModelStats("ollama:codellama");
+      expect(stats).not.toBeNull();
+      expect(stats?.max_input_tokens).toBeGreaterThan(0);
+    });
+  });
+
+  describe("provider-prefixed lookups", () => {
+    test("should find models with provider/ prefix", () => {
+      // Some models in models.json use provider/ prefix
+      const stats = getModelStats("ollama:llama2");
+      expect(stats).not.toBeNull();
+      expect(stats?.max_input_tokens).toBeGreaterThan(0);
+    });
   });
 
-  it("should return cache pricing when available", () => {
-    const stats = getModelStats("anthropic:claude-sonnet-4-5");
+  describe("unknown models", () => {
+    test("should return null for completely unknown model", () => {
+      const stats = getModelStats("unknown:fake-model-9000");
+      expect(stats).toBeNull();
+    });
+
+    test("should return null for known provider but unknown model", () => {
+      const stats = getModelStats("ollama:this-model-does-not-exist");
+      expect(stats).toBeNull();
+    });
+  });
+
+  describe("model without provider prefix", () => {
+    test("should handle model string without provider", () => {
+      const stats = getModelStats("gpt-5");
+      expect(stats).not.toBeNull();
+      expect(stats?.max_input_tokens).toBeGreaterThan(0);
+    });
+  });
+
+  describe("existing test cases", () => {
+    it("should return model stats for claude-sonnet-4-5", () => {
+      const stats = getModelStats("anthropic:claude-sonnet-4-5");
+
+      expect(stats).not.toBeNull();
+      expect(stats?.input_cost_per_token).toBe(0.000003);
+      expect(stats?.output_cost_per_token).toBe(0.000015);
+      expect(stats?.max_input_tokens).toBe(200000);
+    });
+
+    it("should handle model without provider prefix", () => {
+      const stats = getModelStats("claude-sonnet-4-5");
+
+      expect(stats).not.toBeNull();
+      expect(stats?.input_cost_per_token).toBe(0.000003);
+    });
+
+    it("should return cache pricing when available", () => {
+      const stats = getModelStats("anthropic:claude-sonnet-4-5");
+
+      expect(stats?.cache_creation_input_token_cost).toBe(0.00000375);
+      expect(stats?.cache_read_input_token_cost).toBe(3e-7);
+    });
+
+    it("should return null for unknown models", () => {
+      const stats = getModelStats("unknown:model");
 
-    expect(stats?.cache_creation_input_token_cost).toBe(0.00000375);
-    expect(stats?.cache_read_input_token_cost).toBe(3e-7);
+      expect(stats).toBeNull();
+    });
   });
 
-  it("should return null for unknown models", () => {
-    const stats = getModelStats("unknown:model");
+  describe("model data validation", () => {
+    test("should include cache costs when available", () => {
+      const stats = getModelStats("anthropic:claude-opus-4-1");
+      // Anthropic models have cache costs
+      if (stats) {
+        expect(stats.cache_creation_input_token_cost).toBeDefined();
+        expect(stats.cache_read_input_token_cost).toBeDefined();
+      }
+    });
 
-    expect(stats).toBeNull();
+    test("should not include cache costs when unavailable", () => {
+      const stats = getModelStats("ollama:llama3.1");
+      // Ollama models don't have cache costs in models.json
+      if (stats) {
+        expect(stats.cache_creation_input_token_cost).toBeUndefined();
+        expect(stats.cache_read_input_token_cost).toBeUndefined();
+      }
+    });
   });
 });
diff --git a/src/utils/tokens/modelStats.ts b/src/utils/tokens/modelStats.ts
@@ -19,48 +19,26 @@ interface RawModelData {
 }
 
 /**
- * Extracts the model name from a Vercel AI SDK model string
- * @param modelString - Format: "provider:model-name" or just "model-name"
- * @returns The model name without the provider prefix
+ * Validates raw model data has required fields
  */
-function extractModelName(modelString: string): string {
-  const parts = modelString.split(":");
-  return parts.length > 1 ? parts[1] : parts[0];
+function isValidModelData(data: RawModelData): boolean {
+  return (
+    typeof data.max_input_tokens === "number" &&
+    typeof data.input_cost_per_token === "number" &&
+    typeof data.output_cost_per_token === "number"
+  );
 }
 
 /**
- * Gets model statistics for a given Vercel AI SDK model string
- * @param modelString - Format: "provider:model-name" (e.g., "anthropic:claude-opus-4-1")
- * @returns ModelStats or null if model not found
+ * Extracts ModelStats from validated raw data
  */
-export function getModelStats(modelString: string): ModelStats | null {
-  const modelName = extractModelName(modelString);
-
-  // Check main models.json first
-  let data = (modelsData as Record<string, RawModelData>)[modelName];
-
-  // Fall back to models-extra.ts if not found
-  if (!data) {
-    data = (modelsExtra as Record<string, RawModelData>)[modelName];
-  }
-
-  if (!data) {
-    return null;
-  }
-
-  // Validate that we have required fields and correct types
-  if (
-    typeof data.max_input_tokens !== "number" ||
-    typeof data.input_cost_per_token !== "number" ||
-    typeof data.output_cost_per_token !== "number"
-  ) {
-    return null;
-  }
-
+function extractModelStats(data: RawModelData): ModelStats {
+  // Type assertions are safe here because isValidModelData() already validated these fields
+  /* eslint-disable @typescript-eslint/non-nullable-type-assertion-style */
   return {
-    max_input_tokens: data.max_input_tokens,
-    input_cost_per_token: data.input_cost_per_token,
-    output_cost_per_token: data.output_cost_per_token,
+    max_input_tokens: data.max_input_tokens as number,
+    input_cost_per_token: data.input_cost_per_token as number,
+    output_cost_per_token: data.output_cost_per_token as number,
     cache_creation_input_token_cost:
       typeof data.cache_creation_input_token_cost === "number"
         ? data.cache_creation_input_token_cost
@@ -70,4 +48,63 @@ export function getModelStats(modelString: string): ModelStats | null {
         ? data.cache_read_input_token_cost
         : undefined,
   };
+  /* eslint-enable @typescript-eslint/non-nullable-type-assertion-style */
+}
+
+/**
+ * Generates lookup keys for a model string with multiple naming patterns
+ * Handles LiteLLM conventions like "ollama/model-cloud" and "provider/model"
+ */
+function generateLookupKeys(modelString: string): string[] {
+  const colonIndex = modelString.indexOf(":");
+  const provider = colonIndex !== -1 ? modelString.slice(0, colonIndex) : "";
+  const modelName = colonIndex !== -1 ? modelString.slice(colonIndex + 1) : modelString;
+
+  const keys: string[] = [
+    modelName, // Direct model name (e.g., "claude-opus-4-1")
+  ];
+
+  // Add provider-prefixed variants for Ollama and other providers
+  if (provider) {
+    keys.push(
+      `${provider}/${modelName}`, // "ollama/gpt-oss:20b"
+      `${provider}/${modelName}-cloud` // "ollama/gpt-oss:20b-cloud" (LiteLLM convention)
+    );
+
+    // Fallback: strip size suffix for base model lookup
+    // "ollama:gpt-oss:20b" → "ollama/gpt-oss"
+    if (modelName.includes(":")) {
+      const baseModel = modelName.split(":")[0];
+      keys.push(`${provider}/${baseModel}`);
+    }
+  }
+
+  return keys;
+}
+
+/**
+ * Gets model statistics for a given Vercel AI SDK model string
+ * @param modelString - Format: "provider:model-name" (e.g., "anthropic:claude-opus-4-1", "ollama:gpt-oss:20b")
+ * @returns ModelStats or null if model not found
+ */
+export function getModelStats(modelString: string): ModelStats | null {
+  const lookupKeys = generateLookupKeys(modelString);
+
+  // Try each lookup pattern in main models.json
+  for (const key of lookupKeys) {
+    const data = (modelsData as Record<string, RawModelData>)[key];
+    if (data && isValidModelData(data)) {
+      return extractModelStats(data);
+    }
+  }
+
+  // Fall back to models-extra.ts
+  for (const key of lookupKeys) {
+    const data = (modelsExtra as Record<string, RawModelData>)[key];
+    if (data && isValidModelData(data)) {
+      return extractModelStats(data);
+    }
+  }
+
+  return null;
 }