@@ -511,6 +511,7 @@ struct RecursiveChunker<'s> {
511511 chunk_size : usize ,
512512 chunk_overlap : usize ,
513513 min_chunk_size : usize ,
514+ min_atom_chunk_size : usize ,
514515}
515516
516517impl < ' t , ' s : ' t > RecursiveChunker < ' s > {
@@ -526,7 +527,7 @@ impl<'t, 's: 't> RecursiveChunker<'s> {
526527 atom_collector. curr_level = iter_stack. len ( ) ;
527528
528529 if let Some ( current_chunk) = iter_stack. last_mut ( ) . unwrap ( ) . next ( ) {
529- if current_chunk. range . len ( ) <= self . min_chunk_size {
530+ if current_chunk. range . len ( ) <= self . min_atom_chunk_size {
530531 atom_collector. collect ( current_chunk. range ) ;
531532 } else {
532533 match current_chunk. kind {
@@ -819,19 +820,29 @@ impl SimpleFunctionExecutor for Executor {
819820 async fn evaluate ( & self , input : Vec < Value > ) -> Result < Value > {
820821 let full_text = self . args . text . value ( & input) ?. as_str ( ) ?;
821822 let chunk_size = self . args . chunk_size . value ( & input) ?. as_int64 ( ) ?;
822- let recursive_chunker = RecursiveChunker {
823- full_text,
824- chunk_size : chunk_size as usize ,
825- chunk_overlap : ( self . args . chunk_overlap . value ( & input) ?)
823+ let min_chunk_size = ( self . args . min_chunk_size . value ( & input) ?)
824+ . optional ( )
825+ . map ( |v| v. as_int64 ( ) )
826+ . transpose ( ) ?
827+ . unwrap_or ( chunk_size / 2 ) as usize ;
828+ let chunk_overlap = std:: cmp:: min (
829+ ( self . args . chunk_overlap . value ( & input) ?)
826830 . optional ( )
827831 . map ( |v| v. as_int64 ( ) )
828832 . transpose ( ) ?
829833 . unwrap_or ( 0 ) as usize ,
830- min_chunk_size : ( self . args . min_chunk_size . value ( & input) ?)
831- . optional ( )
832- . map ( |v| v. as_int64 ( ) )
833- . transpose ( ) ?
834- . unwrap_or ( chunk_size / 2 ) as usize ,
834+ min_chunk_size,
835+ ) ;
836+ let recursive_chunker = RecursiveChunker {
837+ full_text,
838+ chunk_size : chunk_size as usize ,
839+ chunk_overlap,
840+ min_chunk_size,
841+ min_atom_chunk_size : if chunk_overlap > 0 {
842+ chunk_overlap
843+ } else {
844+ min_chunk_size
845+ } ,
835846 } ;
836847
837848 let language = UniCase :: new (
0 commit comments