Skip to content

Commit 1b577db

Browse files
committed
🤖 feat: add multi-pattern lookup for Ollama model context limits
Fixes context limit display for Ollama models like ollama:gpt-oss:20b. Problem: - User model string: ollama:gpt-oss:20b - Previous lookup: gpt-oss:20b (stripped provider) - models.json key: ollama/gpt-oss:20b-cloud (LiteLLM convention) - Result: Lookup failed, showed "Unknown model limits" Solution: Implemented multi-pattern fallback lookup that tries: 1. Direct model name (claude-opus-4-1) 2. Provider-prefixed (ollama/gpt-oss:20b) 3. Cloud variant (ollama/gpt-oss:20b-cloud) ← matches! 4. Base model (ollama/gpt-oss) as fallback Benefits: - Works automatically for all Ollama models in models.json - Zero configuration required - Backward compatible with existing lookups - No API calls needed (works offline) Testing: - Added 15+ unit tests covering all lookup patterns - Verified ollama:gpt-oss:20b → 131k context limit - All 979 unit tests pass Models that now work: - ollama:gpt-oss:20b → ollama/gpt-oss:20b-cloud (131k) - ollama:gpt-oss:120b → ollama/gpt-oss:120b-cloud (131k) - ollama:llama3.1 → ollama/llama3.1 (8k) - ollama:deepseek-v3.1:671b → ollama/deepseek-v3.1:671b-cloud - Plus all existing Anthropic/OpenAI models
1 parent 5081dce commit 1b577db

File tree

2 files changed

+206
-53
lines changed

2 files changed

+206
-53
lines changed
Lines changed: 133 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,148 @@
1+
import { describe, expect, test, it } from "bun:test";
12
import { getModelStats } from "./modelStats";
23

34
describe("getModelStats", () => {
4-
it("should return model stats for claude-sonnet-4-5", () => {
5-
const stats = getModelStats("anthropic:claude-sonnet-4-5");
5+
describe("direct model lookups", () => {
6+
test("should find anthropic models by direct name", () => {
7+
const stats = getModelStats("anthropic:claude-opus-4-1");
8+
expect(stats).not.toBeNull();
9+
expect(stats?.max_input_tokens).toBeGreaterThan(0);
10+
expect(stats?.input_cost_per_token).toBeGreaterThan(0);
11+
});
612

7-
expect(stats).not.toBeNull();
8-
expect(stats?.input_cost_per_token).toBe(0.000003);
9-
expect(stats?.output_cost_per_token).toBe(0.000015);
10-
expect(stats?.max_input_tokens).toBe(200000);
13+
test("should find openai models by direct name", () => {
14+
const stats = getModelStats("openai:gpt-5");
15+
expect(stats).not.toBeNull();
16+
expect(stats?.max_input_tokens).toBeGreaterThan(0);
17+
});
18+
19+
test("should find models in models-extra.ts", () => {
20+
const stats = getModelStats("openai:gpt-5-pro");
21+
expect(stats).not.toBeNull();
22+
expect(stats?.max_input_tokens).toBe(400000);
23+
expect(stats?.input_cost_per_token).toBe(0.000015);
24+
});
25+
});
26+
27+
describe("ollama model lookups with cloud suffix", () => {
28+
test("should find ollama gpt-oss:20b with cloud suffix", () => {
29+
const stats = getModelStats("ollama:gpt-oss:20b");
30+
expect(stats).not.toBeNull();
31+
expect(stats?.max_input_tokens).toBe(131072);
32+
expect(stats?.input_cost_per_token).toBe(0); // Local models are free
33+
expect(stats?.output_cost_per_token).toBe(0);
34+
});
35+
36+
test("should find ollama gpt-oss:120b with cloud suffix", () => {
37+
const stats = getModelStats("ollama:gpt-oss:120b");
38+
expect(stats).not.toBeNull();
39+
expect(stats?.max_input_tokens).toBe(131072);
40+
});
41+
42+
test("should find ollama deepseek-v3.1:671b with cloud suffix", () => {
43+
const stats = getModelStats("ollama:deepseek-v3.1:671b");
44+
expect(stats).not.toBeNull();
45+
expect(stats?.max_input_tokens).toBeGreaterThan(0);
46+
});
1147
});
1248

13-
it("should handle model without provider prefix", () => {
14-
const stats = getModelStats("claude-sonnet-4-5");
49+
describe("ollama model lookups without cloud suffix", () => {
50+
test("should find ollama llama3.1 directly", () => {
51+
const stats = getModelStats("ollama:llama3.1");
52+
expect(stats).not.toBeNull();
53+
expect(stats?.max_input_tokens).toBeGreaterThan(0);
54+
});
1555

16-
expect(stats).not.toBeNull();
17-
expect(stats?.input_cost_per_token).toBe(0.000003);
56+
test("should find ollama llama3:8b with size variant", () => {
57+
const stats = getModelStats("ollama:llama3:8b");
58+
expect(stats).not.toBeNull();
59+
expect(stats?.max_input_tokens).toBeGreaterThan(0);
60+
});
61+
62+
test("should find ollama codellama", () => {
63+
const stats = getModelStats("ollama:codellama");
64+
expect(stats).not.toBeNull();
65+
expect(stats?.max_input_tokens).toBeGreaterThan(0);
66+
});
67+
});
68+
69+
describe("provider-prefixed lookups", () => {
70+
test("should find models with provider/ prefix", () => {
71+
// Some models in models.json use provider/ prefix
72+
const stats = getModelStats("ollama:llama2");
73+
expect(stats).not.toBeNull();
74+
expect(stats?.max_input_tokens).toBeGreaterThan(0);
75+
});
1876
});
1977

20-
it("should return cache pricing when available", () => {
21-
const stats = getModelStats("anthropic:claude-sonnet-4-5");
78+
describe("unknown models", () => {
79+
test("should return null for completely unknown model", () => {
80+
const stats = getModelStats("unknown:fake-model-9000");
81+
expect(stats).toBeNull();
82+
});
83+
84+
test("should return null for known provider but unknown model", () => {
85+
const stats = getModelStats("ollama:this-model-does-not-exist");
86+
expect(stats).toBeNull();
87+
});
88+
});
89+
90+
describe("model without provider prefix", () => {
91+
test("should handle model string without provider", () => {
92+
const stats = getModelStats("gpt-5");
93+
expect(stats).not.toBeNull();
94+
expect(stats?.max_input_tokens).toBeGreaterThan(0);
95+
});
96+
});
97+
98+
describe("existing test cases", () => {
99+
it("should return model stats for claude-sonnet-4-5", () => {
100+
const stats = getModelStats("anthropic:claude-sonnet-4-5");
101+
102+
expect(stats).not.toBeNull();
103+
expect(stats?.input_cost_per_token).toBe(0.000003);
104+
expect(stats?.output_cost_per_token).toBe(0.000015);
105+
expect(stats?.max_input_tokens).toBe(200000);
106+
});
107+
108+
it("should handle model without provider prefix", () => {
109+
const stats = getModelStats("claude-sonnet-4-5");
110+
111+
expect(stats).not.toBeNull();
112+
expect(stats?.input_cost_per_token).toBe(0.000003);
113+
});
114+
115+
it("should return cache pricing when available", () => {
116+
const stats = getModelStats("anthropic:claude-sonnet-4-5");
117+
118+
expect(stats?.cache_creation_input_token_cost).toBe(0.00000375);
119+
expect(stats?.cache_read_input_token_cost).toBe(3e-7);
120+
});
121+
122+
it("should return null for unknown models", () => {
123+
const stats = getModelStats("unknown:model");
22124

23-
expect(stats?.cache_creation_input_token_cost).toBe(0.00000375);
24-
expect(stats?.cache_read_input_token_cost).toBe(3e-7);
125+
expect(stats).toBeNull();
126+
});
25127
});
26128

27-
it("should return null for unknown models", () => {
28-
const stats = getModelStats("unknown:model");
129+
describe("model data validation", () => {
130+
test("should include cache costs when available", () => {
131+
const stats = getModelStats("anthropic:claude-opus-4-1");
132+
// Anthropic models have cache costs
133+
if (stats) {
134+
expect(stats.cache_creation_input_token_cost).toBeDefined();
135+
expect(stats.cache_read_input_token_cost).toBeDefined();
136+
}
137+
});
29138

30-
expect(stats).toBeNull();
139+
test("should not include cache costs when unavailable", () => {
140+
const stats = getModelStats("ollama:llama3.1");
141+
// Ollama models don't have cache costs in models.json
142+
if (stats) {
143+
expect(stats.cache_creation_input_token_cost).toBeUndefined();
144+
expect(stats.cache_read_input_token_cost).toBeUndefined();
145+
}
146+
});
31147
});
32148
});

src/utils/tokens/modelStats.ts

Lines changed: 73 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -19,48 +19,26 @@ interface RawModelData {
1919
}
2020

2121
/**
22-
* Extracts the model name from a Vercel AI SDK model string
23-
* @param modelString - Format: "provider:model-name" or just "model-name"
24-
* @returns The model name without the provider prefix
22+
* Validates raw model data has required fields
2523
*/
26-
function extractModelName(modelString: string): string {
27-
const parts = modelString.split(":");
28-
return parts.length > 1 ? parts[1] : parts[0];
24+
function isValidModelData(data: RawModelData): boolean {
25+
return (
26+
typeof data.max_input_tokens === "number" &&
27+
typeof data.input_cost_per_token === "number" &&
28+
typeof data.output_cost_per_token === "number"
29+
);
2930
}
3031

3132
/**
32-
* Gets model statistics for a given Vercel AI SDK model string
33-
* @param modelString - Format: "provider:model-name" (e.g., "anthropic:claude-opus-4-1")
34-
* @returns ModelStats or null if model not found
33+
* Extracts ModelStats from validated raw data
3534
*/
36-
export function getModelStats(modelString: string): ModelStats | null {
37-
const modelName = extractModelName(modelString);
38-
39-
// Check main models.json first
40-
let data = (modelsData as Record<string, RawModelData>)[modelName];
41-
42-
// Fall back to models-extra.ts if not found
43-
if (!data) {
44-
data = (modelsExtra as Record<string, RawModelData>)[modelName];
45-
}
46-
47-
if (!data) {
48-
return null;
49-
}
50-
51-
// Validate that we have required fields and correct types
52-
if (
53-
typeof data.max_input_tokens !== "number" ||
54-
typeof data.input_cost_per_token !== "number" ||
55-
typeof data.output_cost_per_token !== "number"
56-
) {
57-
return null;
58-
}
59-
35+
function extractModelStats(data: RawModelData): ModelStats {
36+
// Type assertions are safe here because isValidModelData() already validated these fields
37+
/* eslint-disable @typescript-eslint/non-nullable-type-assertion-style */
6038
return {
61-
max_input_tokens: data.max_input_tokens,
62-
input_cost_per_token: data.input_cost_per_token,
63-
output_cost_per_token: data.output_cost_per_token,
39+
max_input_tokens: data.max_input_tokens as number,
40+
input_cost_per_token: data.input_cost_per_token as number,
41+
output_cost_per_token: data.output_cost_per_token as number,
6442
cache_creation_input_token_cost:
6543
typeof data.cache_creation_input_token_cost === "number"
6644
? data.cache_creation_input_token_cost
@@ -70,4 +48,63 @@ export function getModelStats(modelString: string): ModelStats | null {
7048
? data.cache_read_input_token_cost
7149
: undefined,
7250
};
51+
/* eslint-enable @typescript-eslint/non-nullable-type-assertion-style */
52+
}
53+
54+
/**
55+
* Generates lookup keys for a model string with multiple naming patterns
56+
* Handles LiteLLM conventions like "ollama/model-cloud" and "provider/model"
57+
*/
58+
function generateLookupKeys(modelString: string): string[] {
59+
const colonIndex = modelString.indexOf(":");
60+
const provider = colonIndex !== -1 ? modelString.slice(0, colonIndex) : "";
61+
const modelName = colonIndex !== -1 ? modelString.slice(colonIndex + 1) : modelString;
62+
63+
const keys: string[] = [
64+
modelName, // Direct model name (e.g., "claude-opus-4-1")
65+
];
66+
67+
// Add provider-prefixed variants for Ollama and other providers
68+
if (provider) {
69+
keys.push(
70+
`${provider}/${modelName}`, // "ollama/gpt-oss:20b"
71+
`${provider}/${modelName}-cloud` // "ollama/gpt-oss:20b-cloud" (LiteLLM convention)
72+
);
73+
74+
// Fallback: strip size suffix for base model lookup
75+
// "ollama:gpt-oss:20b" → "ollama/gpt-oss"
76+
if (modelName.includes(":")) {
77+
const baseModel = modelName.split(":")[0];
78+
keys.push(`${provider}/${baseModel}`);
79+
}
80+
}
81+
82+
return keys;
83+
}
84+
85+
/**
86+
* Gets model statistics for a given Vercel AI SDK model string
87+
* @param modelString - Format: "provider:model-name" (e.g., "anthropic:claude-opus-4-1", "ollama:gpt-oss:20b")
88+
* @returns ModelStats or null if model not found
89+
*/
90+
export function getModelStats(modelString: string): ModelStats | null {
91+
const lookupKeys = generateLookupKeys(modelString);
92+
93+
// Try each lookup pattern in main models.json
94+
for (const key of lookupKeys) {
95+
const data = (modelsData as Record<string, RawModelData>)[key];
96+
if (data && isValidModelData(data)) {
97+
return extractModelStats(data);
98+
}
99+
}
100+
101+
// Fall back to models-extra.ts
102+
for (const key of lookupKeys) {
103+
const data = (modelsExtra as Record<string, RawModelData>)[key];
104+
if (data && isValidModelData(data)) {
105+
return extractModelStats(data);
106+
}
107+
}
108+
109+
return null;
73110
}

0 commit comments

Comments
 (0)