@@ -409,3 +409,92 @@ fn chapter_settings_priority() {
409409 ) ;
410410 }
411411}
412+
413+ #[ cfg( test) ]
414+ mod tests {
415+ use super :: * ;
416+
417+ #[ test]
418+ fn test_tokenize_basic ( ) {
419+ assert_eq ! ( tokenize( "hello world" ) , vec![ "hello" , "world" ] ) ;
420+ }
421+
422+ #[ test]
423+ fn test_tokenize_with_hyphens ( ) {
424+ assert_eq ! (
425+ tokenize( "hello-world test-case" ) ,
426+ vec![ "hello" , "world" , "test" , "case" ]
427+ ) ;
428+ }
429+
430+ #[ test]
431+ fn test_tokenize_mixed_whitespace ( ) {
432+ assert_eq ! (
433+ tokenize( "hello\t world\n test\r \n case" ) ,
434+ vec![ "hello" , "world" , "test" , "case" ]
435+ ) ;
436+ }
437+
438+ #[ test]
439+ fn test_tokenize_empty_string ( ) {
440+ assert_eq ! ( tokenize( "" ) , Vec :: <String >:: new( ) ) ;
441+ }
442+
443+ #[ test]
444+ fn test_tokenize_only_whitespace ( ) {
445+ assert_eq ! ( tokenize( " \t \n " ) , Vec :: <String >:: new( ) ) ;
446+ }
447+
448+ #[ test]
449+ fn test_tokenize_case_normalization ( ) {
450+ assert_eq ! ( tokenize( "Hello WORLD Test" ) , vec![ "hello" , "world" , "test" ] ) ;
451+ }
452+
453+ #[ test]
454+ fn test_tokenize_trim_whitespace ( ) {
455+ assert_eq ! ( tokenize( " hello world " ) , vec![ "hello" , "world" ] ) ;
456+ }
457+
458+ #[ test]
459+ fn test_tokenize_long_words_filtered ( ) {
460+ let long_word = "a" . repeat ( MAX_WORD_LENGTH_TO_INDEX + 1 ) ;
461+ let short_word = "a" . repeat ( MAX_WORD_LENGTH_TO_INDEX ) ;
462+ let input = format ! ( "{} hello {}" , long_word, short_word) ;
463+ assert_eq ! ( tokenize( & input) , vec![ "hello" , & short_word] ) ;
464+ }
465+
466+ #[ test]
467+ fn test_tokenize_max_length_word ( ) {
468+ let max_word = "a" . repeat ( MAX_WORD_LENGTH_TO_INDEX ) ;
469+ assert_eq ! ( tokenize( & max_word) , vec![ max_word] ) ;
470+ }
471+
472+ #[ test]
473+ fn test_tokenize_special_characters ( ) {
474+ assert_eq ! (
475+ tokenize( "hello,world.test!case?" ) ,
476+ vec![ "hello,world.test!case?" ]
477+ ) ;
478+ }
479+
480+ #[ test]
481+ fn test_tokenize_unicode ( ) {
482+ assert_eq ! (
483+ tokenize( "café naïve résumé" ) ,
484+ vec![ "café" , "naïve" , "résumé" ]
485+ ) ;
486+ }
487+
488+ #[ test]
489+ fn test_tokenize_unicode_rtl_hebre ( ) {
490+ assert_eq ! ( tokenize( "שלום עולם" ) , vec![ "שלום" , "עולם" ] ) ;
491+ }
492+
493+ #[ test]
494+ fn test_tokenize_numbers ( ) {
495+ assert_eq ! (
496+ tokenize( "test123 456-789 hello" ) ,
497+ vec![ "test123" , "456" , "789" , "hello" ]
498+ ) ;
499+ }
500+ }
0 commit comments