1- import { describe , expect , it , vi } from 'vitest' ;
1+ import { describe , expect , it } from 'vitest' ;
22import {
3- CHAR_TO_TOKEN_RATIO ,
43 CHAR_TO_TOKEN_RATIO_ZH ,
5- MAX_CHUNK_SIZE_TOKENS ,
6- MAX_INPUT_TOKENS ,
7- MAX_OUTPUT_TOKENS ,
4+ MAX_OUTPUT_TOKENS_CHAT ,
5+ MAX_OUTPUT_TOKENS_REASONER ,
86 estimateTokens ,
7+ getMaxOutputTokens ,
98 needsChunking ,
109 splitIntoChunks ,
1110} from '../../src/chunk' ;
1211
1312describe ( 'chunk' , ( ) => {
13+ describe ( 'getMaxOutputTokens' , ( ) => {
14+ it ( 'should return correct token limits for deepseek-chat' , ( ) => {
15+ expect ( getMaxOutputTokens ( 'deepseek-chat' ) ) . toBe ( MAX_OUTPUT_TOKENS_CHAT ) ;
16+ } ) ;
17+
18+ it ( 'should return correct token limits for deepseek-reasoner' , ( ) => {
19+ expect ( getMaxOutputTokens ( 'deepseek-reasoner' ) ) . toBe (
20+ MAX_OUTPUT_TOKENS_REASONER ,
21+ ) ;
22+ } ) ;
23+ } ) ;
24+
1425 describe ( 'estimateTokens' , ( ) => {
1526 it ( 'should estimate tokens based on content length' , ( ) => {
1627 // Create test strings of different lengths
@@ -34,23 +45,41 @@ describe('chunk', () => {
3445 } ) ;
3546
3647 describe ( 'needsChunking' , ( ) => {
37- it ( 'should return true for content exceeding MAX_OUTPUT_TOKENS' , ( ) => {
38- // Create a string that would exceed the MAX_OUTPUT_TOKENS
39- // MAX_OUTPUT_TOKENS / CHAR_TO_TOKEN_RATIO_ZH gives us the number of characters needed
48+ it ( 'should return true for content exceeding MAX_OUTPUT_TOKENS for deepseek-chat' , ( ) => {
49+ // Create a string that would exceed the MAX_OUTPUT_TOKENS_CHAT
50+ // MAX_OUTPUT_TOKENS_CHAT / CHAR_TO_TOKEN_RATIO_ZH gives us the number of characters needed
51+ const exceedMaxTokens = 'a' . repeat (
52+ Math . ceil ( MAX_OUTPUT_TOKENS_CHAT / CHAR_TO_TOKEN_RATIO_ZH ) + 1000 ,
53+ ) ;
54+
55+ expect ( needsChunking ( exceedMaxTokens , 'deepseek-chat' ) ) . toBe ( true ) ;
56+ } ) ;
57+
58+ it ( 'should return false for content within MAX_OUTPUT_TOKENS for deepseek-chat' , ( ) => {
59+ // Create a string that would be below the MAX_OUTPUT_TOKENS_CHAT
60+ const withinMaxTokens = 'a' . repeat (
61+ Math . ceil ( MAX_OUTPUT_TOKENS_CHAT / CHAR_TO_TOKEN_RATIO_ZH / 2 ) ,
62+ ) ;
63+
64+ expect ( needsChunking ( withinMaxTokens , 'deepseek-chat' ) ) . toBe ( false ) ;
65+ } ) ;
66+
67+ it ( 'should return true for content exceeding MAX_OUTPUT_TOKENS for deepseek-reasoner' , ( ) => {
68+ // Create a string that would exceed the MAX_OUTPUT_TOKENS_REASONER
4069 const exceedMaxTokens = 'a' . repeat (
41- Math . ceil ( MAX_OUTPUT_TOKENS / CHAR_TO_TOKEN_RATIO_ZH ) + 1000 ,
70+ Math . ceil ( MAX_OUTPUT_TOKENS_REASONER / CHAR_TO_TOKEN_RATIO_ZH ) + 1000 ,
4271 ) ;
4372
44- expect ( needsChunking ( exceedMaxTokens ) ) . toBe ( true ) ;
73+ expect ( needsChunking ( exceedMaxTokens , 'deepseek-reasoner' ) ) . toBe ( true ) ;
4574 } ) ;
4675
47- it ( 'should return false for content within MAX_OUTPUT_TOKENS' , ( ) => {
48- // Create a string that would be below the MAX_OUTPUT_TOKENS
76+ it ( 'should return false for content within MAX_OUTPUT_TOKENS for deepseek-reasoner ' , ( ) => {
77+ // Create a string that would be below the MAX_OUTPUT_TOKENS_REASONER
4978 const withinMaxTokens = 'a' . repeat (
50- Math . ceil ( MAX_OUTPUT_TOKENS / CHAR_TO_TOKEN_RATIO_ZH / 2 ) ,
79+ Math . ceil ( MAX_OUTPUT_TOKENS_REASONER / CHAR_TO_TOKEN_RATIO_ZH / 2 ) ,
5180 ) ;
5281
53- expect ( needsChunking ( withinMaxTokens ) ) . toBe ( false ) ;
82+ expect ( needsChunking ( withinMaxTokens , 'deepseek-reasoner' ) ) . toBe ( false ) ;
5483 } ) ;
5584 } ) ;
5685
@@ -72,7 +101,7 @@ Content for section 2.
72101
73102More content.` ;
74103
75- const chunks = splitIntoChunks ( content ) ;
104+ const chunks = splitIntoChunks ( content , 'deepseek-chat' ) ;
76105
77106 // The current implementation doesn't split by markdown headings as expected
78107 // so we're testing the actual behavior
@@ -87,10 +116,10 @@ More content.`;
87116 it ( 'should handle large sections with the current implementation' , ( ) => {
88117 // Create a very large section without headings
89118 const largeSection = 'a' . repeat (
90- Math . ceil ( ( MAX_OUTPUT_TOKENS / CHAR_TO_TOKEN_RATIO_ZH ) * 3 ) ,
119+ Math . ceil ( ( MAX_OUTPUT_TOKENS_CHAT / CHAR_TO_TOKEN_RATIO_ZH ) * 3 ) ,
91120 ) ;
92121
93- const chunks = splitIntoChunks ( largeSection ) ;
122+ const chunks = splitIntoChunks ( largeSection , 'deepseek-chat' ) ;
94123
95124 // The current implementation returns a single large chunk
96125 expect ( chunks . length ) . toBeGreaterThanOrEqual ( 1 ) ;
@@ -100,7 +129,7 @@ More content.`;
100129 } ) ;
101130
102131 it ( 'should handle empty content' , ( ) => {
103- const chunks = splitIntoChunks ( '' ) ;
132+ const chunks = splitIntoChunks ( '' , 'deepseek-chat' ) ;
104133 expect ( chunks ) . toEqual ( [ ] ) ;
105134 } ) ;
106135 } ) ;
0 commit comments