Skip to content

Commit 10085e4

Browse files
committed
feat: ability to pick custom model
1 parent f342aee commit 10085e4

File tree

6 files changed

+129
-51
lines changed

6 files changed

+129
-51
lines changed

package.json

Lines changed: 42 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -38,23 +38,9 @@
3838
"properties": {
3939
"inference.endpoint": {
4040
"type": "string",
41-
"default": "http://127.0.0.1:11434/",
42-
"description": "Ollama Server Endpoint"
43-
},
44-
"inference.maxLines": {
45-
"type": "number",
46-
"default": 16,
47-
"description": "Max number of lines to be keep."
48-
},
49-
"inference.maxTokens": {
50-
"type": "number",
51-
"default": 256,
52-
"description": "Max number of new tokens to be generated."
53-
},
54-
"inference.temperature": {
55-
"type": "number",
56-
"default": 0.2,
57-
"description": "Temperature of the model. Increasing the temperature will make the model answer more creatively."
41+
"default": "",
42+
"description": "Ollama Server Endpoint. Empty for local instance.",
43+
"order": 1
5844
},
5945
"inference.model": {
6046
"type": "string",
@@ -81,10 +67,46 @@
8167
"deepseek-coder:6.7b-base-fp16",
8268
"deepseek-coder:33b-base-q4_K_S",
8369
"deepseek-coder:33b-base-q4_K_M",
84-
"deepseek-coder:33b-base-fp16"
70+
"deepseek-coder:33b-base-fp16",
71+
"custom"
8572
],
8673
"default": "deepseek-coder:1.3b-base-q4_1",
87-
"description": "Inference model to use"
74+
"description": "Inference model to use",
75+
"order": 2
76+
},
77+
"inference.temperature": {
78+
"type": "number",
79+
"default": 0.2,
80+
"description": "Temperature of the model. Increasing the temperature will make the model answer more creatively.",
81+
"order": 3
82+
},
83+
"inference.custom.model": {
84+
"type": "string",
85+
"default": "",
86+
"description": "Custom model name",
87+
"order": 4
88+
},
89+
"inference.custom.format": {
90+
"type": "string",
91+
"enum": [
92+
"codellama",
93+
"deepseek"
94+
],
95+
"default": "codellama",
96+
"description": "Custom model prompt format",
97+
"order": 5
98+
},
99+
"inference.maxLines": {
100+
"type": "number",
101+
"default": 16,
102+
"description": "Max number of lines to be keep.",
103+
"order": 6
104+
},
105+
"inference.maxTokens": {
106+
"type": "number",
107+
"default": 256,
108+
"description": "Max number of new tokens to be generated.",
109+
"order": 7
88110
}
89111
}
90112
}
@@ -111,4 +133,4 @@
111133
"ts-jest": "^29.1.1",
112134
"typescript": "^5.2.2"
113135
}
114-
}
136+
}

src/config.ts

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import vscode from 'vscode';
2+
3+
class Config {
4+
5+
// Inference
6+
get inference() {
7+
let config = this.#config;
8+
9+
// Load endpoint
10+
let endpoint = (config.get('endpoint') as string).trim();
11+
if (endpoint.endsWith('/')) {
12+
endpoint = endpoint.slice(0, endpoint.length - 1).trim();
13+
}
14+
if (endpoint === '') {
15+
endpoint = 'http://127.0.0.1:11434';
16+
}
17+
18+
// Load general paremeters
19+
let maxLines = config.get('maxLines') as number;
20+
let maxTokens = config.get('maxTokens') as number;
21+
let temperature = config.get('temperature') as number;
22+
23+
// Load model
24+
let modelName = config.get('model') as string;
25+
let modelFormat: 'codellama' | 'deepseek' = 'codellama';
26+
if (modelName === 'custom') {
27+
modelName = config.get('custom.model') as string;
28+
modelFormat = config.get('cutom.format') as 'codellama' | 'deepseek';
29+
} else {
30+
if (modelName.startsWith('deepseek-coder')) {
31+
modelFormat = 'deepseek';
32+
}
33+
}
34+
35+
return {
36+
endpoint,
37+
maxLines,
38+
maxTokens,
39+
temperature,
40+
modelName,
41+
modelFormat
42+
};
43+
}
44+
45+
get #config() {
46+
return vscode.workspace.getConfiguration('inference');
47+
};
48+
}
49+
50+
export const config = new Config();

src/prompts/autocomplete.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,21 @@
11
import { ollamaTokenGenerator } from '../modules/ollamaTokenGenerator';
22
import { countSymbol } from '../modules/text';
33
import { info } from '../modules/log';
4-
import { adaptPrompt } from './adaptors/adaptPrompt';
4+
import { ModelFormat, adaptPrompt } from './processors/models';
55

66
export async function autocomplete(args: {
77
endpoint: string,
88
model: string,
9+
format: ModelFormat,
910
prefix: string,
10-
suffix: string | null,
11+
suffix: string,
1112
maxLines: number,
1213
maxTokens: number,
1314
temperature: number,
1415
canceled?: () => boolean,
1516
}): Promise<string> {
1617

17-
let prompt = adaptPrompt({ prefix: args.prefix, suffix: args.suffix, model: args.model });
18+
let prompt = adaptPrompt({ prefix: args.prefix, suffix: args.suffix, format: args.format });
1819

1920
// Calculate arguments
2021
let data = {

src/prompts/preparePrompt.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,14 @@ export async function preparePrompt(document: vscode.TextDocument, position: vsc
99
let text = document.getText();
1010
let offset = document.offsetAt(position);
1111
let prefix = text.slice(0, offset);
12-
let suffix: string | null = text.slice(offset);
12+
let suffix: string = text.slice(offset);
1313

1414
// Trim suffix
1515
// If suffix is too small it is safe to assume that it could be ignored which would allow us to use
1616
// more powerful completition instead of in middle one
17-
if (suffix.length < 256) {
18-
suffix = null;
19-
}
17+
// if (suffix.length < 256) {
18+
// suffix = null;
19+
// }
2020

2121
// Add filename and language to prefix
2222
// NOTE: Most networks don't have a concept of filenames and expected language, but we expect that some files in training set has something in title that

src/prompts/adaptors/adaptPrompt.ts renamed to src/prompts/processors/models.ts

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
1-
export function adaptPrompt(args: { model: string, prefix: string, suffix: string | null }): { prompt: string, stop: string[] } {
1+
export type ModelFormat = 'codellama' | 'deepseek';
2+
3+
export function adaptPrompt(args: { format: ModelFormat, prefix: string, suffix: string }): { prompt: string, stop: string[] } {
24

35
// Common non FIM mode
4-
if (!args.suffix) {
5-
return {
6-
prompt: args.prefix,
7-
stop: [`<END>`]
8-
};
9-
}
6+
// if (!args.suffix) {
7+
// return {
8+
// prompt: args.prefix,
9+
// stop: [`<END>`]
10+
// };
11+
// }
1012

1113
// Starcoder FIM
12-
if (args.model.startsWith('deepseek-coder')) {
14+
if (args.format === 'deepseek') {
1315
return {
1416
prompt: `<|fim▁begin|>${args.prefix}<|fim▁hole|>${args.suffix}<|fim▁end|>`,
1517
stop: [`<|fim▁begin|>`, `<|fim▁hole|>`, `<|fim▁end|>`, `<END>`]

src/prompts/provider.ts

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { getFromPromptCache, setPromptToCache } from './promptCache';
77
import { isNotNeeded, isSupported } from './filter';
88
import { ollamaCheckModel } from '../modules/ollamaCheckModel';
99
import { ollamaDownloadModel } from '../modules/ollamaDownloadModel';
10+
import { config } from '../config';
1011

1112
export class PromptProvider implements vscode.InlineCompletionItemProvider {
1213

@@ -62,22 +63,23 @@ export class PromptProvider implements vscode.InlineCompletionItemProvider {
6263
if (cached === undefined) {
6364

6465
// Config
65-
let config = vscode.workspace.getConfiguration('inference');
66-
let endpoint = config.get('endpoint') as string;
67-
let model = config.get('model') as string;
68-
let maxLines = config.get('maxLines') as number;
69-
let maxTokens = config.get('maxTokens') as number;
70-
let temperature = config.get('temperature') as number;
71-
if (endpoint.endsWith('/')) {
72-
endpoint = endpoint.slice(0, endpoint.length - 1);
73-
}
66+
let inferenceConfig = config.inference;
67+
// let config = vscode.workspace.getConfiguration('inference');
68+
// let endpoint = config.get('endpoint') as string;
69+
// let model = config.get('model') as string;
70+
// let maxLines = config.get('maxLines') as number;
71+
// let maxTokens = config.get('maxTokens') as number;
72+
// let temperature = config.get('temperature') as number;
73+
// if (endpoint.endsWith('/')) {
74+
// endpoint = endpoint.slice(0, endpoint.length - 1);
75+
// }
7476

7577
// Update status
7678
this.statusbar.text = `$(sync~spin) Llama Coder`;
7779
try {
7880

7981
// Check model exists
80-
let modelExists = await ollamaCheckModel(endpoint, model);
82+
let modelExists = await ollamaCheckModel(inferenceConfig.endpoint, inferenceConfig.modelName);
8183
if (token.isCancellationRequested) {
8284
info(`Canceled after AI completion.`);
8385
return;
@@ -86,7 +88,7 @@ export class PromptProvider implements vscode.InlineCompletionItemProvider {
8688
// Download model if not exists
8789
if (!modelExists) {
8890
this.statusbar.text = `$(sync~spin) Downloading`;
89-
await ollamaDownloadModel(endpoint, model);
91+
await ollamaDownloadModel(inferenceConfig.endpoint, inferenceConfig.modelName);
9092
this.statusbar.text = `$(sync~spin) Llama Coder`;
9193
}
9294
if (token.isCancellationRequested) {
@@ -99,11 +101,12 @@ export class PromptProvider implements vscode.InlineCompletionItemProvider {
99101
res = await autocomplete({
100102
prefix: prepared.prefix,
101103
suffix: prepared.suffix,
102-
endpoint: endpoint,
103-
model: model,
104-
maxLines: maxLines,
105-
maxTokens: maxTokens,
106-
temperature,
104+
endpoint: inferenceConfig.endpoint,
105+
model: inferenceConfig.modelName,
106+
format: inferenceConfig.modelFormat,
107+
maxLines: inferenceConfig.maxLines,
108+
maxTokens: inferenceConfig.maxTokens,
109+
temperature: inferenceConfig.temperature,
107110
canceled: () => token.isCancellationRequested,
108111
});
109112
info(`AI completion completed: ${res}`);

0 commit comments

Comments
 (0)