1- import { ollamaTokenGenerator } from '../modules/ollamaTokenGenerator' ;
2- import { countSymbol } from '../modules/text' ;
3- import { info } from '../modules/log' ;
1+ import { makeOllamaRequest } from "../modules/ollamaRequest" ;
2+
3+ type OllamaToken = {
4+ model : string ,
5+ response : string ,
6+ } ;
47
58export async function autocomplete ( args : {
69 endpoint : string ,
@@ -20,30 +23,27 @@ export async function autocomplete(args: {
2023 prompt : args . prefix ,
2124 suffix : args . suffix ,
2225 raw : true ,
26+ stream : false ,
2327 options : {
2428 num_predict : args . maxTokens ,
2529 temperature : args . temperature
2630 }
2731 } ;
2832
29- // Receiving tokens
30- let res = '' ;
31- let totalLines = 1 ;
32- for await ( let tokens of ollamaTokenGenerator ( args . endpoint + '/api/generate' , data , args . bearerToken ) ) {
33+ const res = await makeOllamaRequest ( args . endpoint + '/api/generate' , data , args . bearerToken ) ;
34+ try {
35+ const tokens = JSON . parse ( res ) as OllamaToken ;
3336 if ( args . canceled && args . canceled ( ) ) {
34- break ;
35- }
36-
37- res = res + tokens . response ;
38-
39- // Update total lines
40- totalLines += countSymbol ( tokens . response , '\n' ) ;
41- // Break if too many lines and on top level
42- if ( totalLines > args . maxLines ) {
43- info ( 'Too many lines, breaking.' ) ;
44- break ;
37+ return "" ;
4538 }
39+ const response = tokens . response ;
40+
41+ // take only args.maLines lines from the response
42+ let lines = response . split ( '\n' ) ;
43+ lines = lines . slice ( 0 , args . maxLines ) ;
44+ return lines . join ( '\n' ) ;
45+ } catch ( e ) {
46+ console . warn ( 'Receive wrong line: ' + res ) ;
47+ return "" ;
4648 }
47-
48- return res ;
4949}
0 commit comments