@@ -4,94 +4,49 @@ extern crate unicode_segmentation;
44
55use bencher:: Bencher ;
66use unicode_segmentation:: UnicodeSegmentation ;
7+ use std:: fs;
78
8- const TEXT_ARABIC : & str = include_str ! ( "texts/arabic.txt" ) ;
9- const TEXT_ENGLISH : & str = include_str ! ( "texts/english.txt" ) ;
10- const TEXT_HINDI : & str = include_str ! ( "texts/hindi.txt" ) ;
11- const TEXT_JAPANESE : & str = include_str ! ( "texts/japanese.txt" ) ;
12- const TEXT_KOREAN : & str = include_str ! ( "texts/korean.txt" ) ;
13- const TEXT_MANDARIN : & str = include_str ! ( "texts/mandarin.txt" ) ;
14- const TEXT_RUSSIAN : & str = include_str ! ( "texts/russian.txt" ) ;
15- const TEXT_SOURCE_CODE : & str = include_str ! ( "texts/source_code.txt" ) ;
16-
17- fn graphemes_arabic ( bench : & mut Bencher ) {
9+ fn graphemes ( bench : & mut Bencher , path : & str ) {
10+ let text = fs:: read_to_string ( path) . unwrap ( ) ;
1811 bench. iter ( || {
19- for g in UnicodeSegmentation :: graphemes ( TEXT_ARABIC , true ) {
12+ for g in UnicodeSegmentation :: graphemes ( & * text , true ) {
2013 bencher:: black_box ( g) ;
2114 }
2215 } ) ;
2316
24- bench. bytes = TEXT_ARABIC . len ( ) as u64 ;
17+ bench. bytes = text . len ( ) as u64 ;
2518}
2619
27- fn graphemes_english ( bench : & mut Bencher ) {
28- bench. iter ( || {
29- for g in UnicodeSegmentation :: graphemes ( TEXT_ENGLISH , true ) {
30- bencher:: black_box ( g) ;
31- }
32- } ) ;
20+ fn graphemes_arabic ( bench : & mut Bencher ) {
21+ graphemes ( bench, "benches/texts/arabic.txt" ) ;
22+ }
3323
34- bench. bytes = TEXT_ENGLISH . len ( ) as u64 ;
24+ fn graphemes_english ( bench : & mut Bencher ) {
25+ graphemes ( bench, "benches/texts/english.txt" ) ;
3526}
3627
3728fn graphemes_hindi ( bench : & mut Bencher ) {
38- bench. iter ( || {
39- for g in UnicodeSegmentation :: graphemes ( TEXT_HINDI , true ) {
40- bencher:: black_box ( g) ;
41- }
42- } ) ;
43-
44- bench. bytes = TEXT_HINDI . len ( ) as u64 ;
29+ graphemes ( bench, "benches/texts/hindi.txt" ) ;
4530}
4631
4732fn graphemes_japanese ( bench : & mut Bencher ) {
48- bench. iter ( || {
49- for g in UnicodeSegmentation :: graphemes ( TEXT_JAPANESE , true ) {
50- bencher:: black_box ( g) ;
51- }
52- } ) ;
53-
54- bench. bytes = TEXT_JAPANESE . len ( ) as u64 ;
33+ graphemes ( bench, "benches/texts/japanese.txt" ) ;
5534}
5635
5736fn graphemes_korean ( bench : & mut Bencher ) {
58- bench. iter ( || {
59- for g in UnicodeSegmentation :: graphemes ( TEXT_KOREAN , true ) {
60- bencher:: black_box ( g) ;
61- }
62- } ) ;
63-
64- bench. bytes = TEXT_KOREAN . len ( ) as u64 ;
37+ graphemes ( bench, "benches/texts/korean.txt" ) ;
6538}
6639
6740fn graphemes_mandarin ( bench : & mut Bencher ) {
68- bench. iter ( || {
69- for g in UnicodeSegmentation :: graphemes ( TEXT_MANDARIN , true ) {
70- bencher:: black_box ( g) ;
71- }
72- } ) ;
73-
74- bench. bytes = TEXT_MANDARIN . len ( ) as u64 ;
41+ graphemes ( bench, "benches/texts/mandarin.txt" ) ;
7542}
7643
7744fn graphemes_russian ( bench : & mut Bencher ) {
78- bench. iter ( || {
79- for g in UnicodeSegmentation :: graphemes ( TEXT_RUSSIAN , true ) {
80- bencher:: black_box ( g) ;
81- }
82- } ) ;
83-
84- bench. bytes = TEXT_RUSSIAN . len ( ) as u64 ;
45+ graphemes ( bench, "benches/texts/russian.txt" ) ;
8546}
8647
8748fn graphemes_source_code ( bench : & mut Bencher ) {
88- bench. iter ( || {
89- for g in UnicodeSegmentation :: graphemes ( TEXT_SOURCE_CODE , true ) {
90- bencher:: black_box ( g) ;
91- }
92- } ) ;
93-
94- bench. bytes = TEXT_SOURCE_CODE . len ( ) as u64 ;
49+ graphemes ( bench, "benches/texts/source_code.txt" ) ;
9550}
9651
9752benchmark_group ! (
0 commit comments