104104 reason = "use the crates.io `getopts` library instead" ) ) ]
105105
106106#[ cfg( test) ] #[ macro_use] extern crate log;
107+ extern crate unicode_width;
108+
107109
108110use self :: Name :: * ;
109111use self :: HasArg :: * ;
110112use self :: Occur :: * ;
111113use self :: Fail :: * ;
112114use self :: Optval :: * ;
113- use self :: SplitWithinState :: * ;
114- use self :: Whitespace :: * ;
115- use self :: LengthLimit :: * ;
116115
117116use std:: error:: Error ;
118117use std:: ffi:: OsStr ;
119118use std:: fmt;
120119use std:: iter:: { repeat, IntoIterator } ;
121120use std:: result;
122121
122+ use unicode_width:: UnicodeWidthStr ;
123+
123124/// A description of the options that a program can handle.
124125pub struct Options {
125126 grps : Vec < OptGroup > ,
@@ -479,7 +480,7 @@ impl Options {
479480 let mut row = " " . to_string ( ) ;
480481
481482 // short option
482- match short_name. len ( ) {
483+ match short_name. width ( ) {
483484 0 => {
484485 if any_short {
485486 row. push_str ( " " ) ;
@@ -488,26 +489,23 @@ impl Options {
488489 1 => {
489490 row. push ( '-' ) ;
490491 row. push_str ( & short_name) ;
491- if long_name. len ( ) > 0 {
492+ if long_name. width ( ) > 0 {
492493 row. push_str ( ", " ) ;
493494 } else {
494495 // Only a single space here, so that any
495496 // argument is printed in the correct spot.
496497 row. push ( ' ' ) ;
497498 }
498499 }
500+ // FIXME: refer issue #7.
499501 _ => panic ! ( "the short name should only be 1 ascii char long" ) ,
500502 }
501503
502504 // long option
503- match long_name. len ( ) {
505+ match long_name. width ( ) {
504506 0 => { }
505507 _ => {
506- if self . long_only {
507- row. push ( '-' ) ;
508- } else {
509- row. push_str ( "--" ) ;
510- }
508+ row. push_str ( if self . long_only { "-" } else { "--" } ) ;
511509 row. push_str ( & long_name) ;
512510 row. push ( ' ' ) ;
513511 }
@@ -524,9 +522,7 @@ impl Options {
524522 }
525523 }
526524
527- // FIXME: #5516 should be graphemes not codepoints
528- // here we just need to indent the start of the description
529- let rowlen = row. chars ( ) . count ( ) ;
525+ let rowlen = row. width ( ) ;
530526 if rowlen < 24 {
531527 for _ in 0 .. 24 - rowlen {
532528 row. push ( ' ' ) ;
@@ -535,25 +531,7 @@ impl Options {
535531 row. push_str ( & desc_sep)
536532 }
537533
538- // Normalize desc to contain words separated by one space character
539- let mut desc_normalized_whitespace = String :: new ( ) ;
540- for word in desc. split ( |c : char | c. is_whitespace ( ) )
541- . filter ( |s| !s. is_empty ( ) ) {
542- desc_normalized_whitespace. push_str ( word) ;
543- desc_normalized_whitespace. push ( ' ' ) ;
544- }
545-
546- // FIXME: #5516 should be graphemes not codepoints
547- let mut desc_rows = Vec :: new ( ) ;
548- each_split_within ( & desc_normalized_whitespace,
549- 54 ,
550- |substr| {
551- desc_rows. push ( substr. to_string ( ) ) ;
552- true
553- } ) ;
554-
555- // FIXME: #5516 should be graphemes not codepoints
556- // wrapped description
534+ let desc_rows = each_split_within ( & desc, 54 ) ;
557535 row. push_str ( & desc_rows. join ( & desc_sep) ) ;
558536
559537 row
@@ -923,118 +901,59 @@ fn format_option(opt: &OptGroup) -> String {
923901 line
924902}
925903
926- #[ derive( Clone , Copy ) ]
927- enum SplitWithinState {
928- A , // leading whitespace, initial state
929- B , // words
930- C , // internal and trailing whitespace
931- }
932-
933- #[ derive( Clone , Copy ) ]
934- enum Whitespace {
935- Ws , // current char is whitespace
936- Cr // current char is not whitespace
937- }
938-
939- #[ derive( Clone , Copy ) ]
940- enum LengthLimit {
941- UnderLim , // current char makes current substring still fit in limit
942- OverLim // current char makes current substring no longer fit in limit
943- }
944-
945-
946904/// Splits a string into substrings with possibly internal whitespace,
947905/// each of them at most `lim` bytes long, if possible. The substrings
948906/// have leading and trailing whitespace removed, and are only cut at
949907/// whitespace boundaries.
950- ///
951- /// Note: Function was moved here from `std::str` because this module is the only place that
952- /// uses it, and because it was too specific for a general string function.
953- fn each_split_within < ' a , F > ( ss : & ' a str , lim : usize , mut it : F )
954- -> bool where F : FnMut ( & ' a str ) -> bool {
955- // Just for fun, let's write this as a state machine:
956-
957- let mut slice_start = 0 ;
958- let mut last_start = 0 ;
959- let mut last_end = 0 ;
960- let mut state = A ;
961- let mut fake_i = ss. len ( ) ;
962- let mut lim = lim;
963-
964- let mut cont = true ;
965-
966- // if the limit is larger than the string, lower it to save cycles
967- if lim >= fake_i {
968- lim = fake_i;
969- }
970-
971- let mut machine = |cont : & mut bool , state : & mut SplitWithinState , ( i, c) : ( usize , char ) | {
972- let whitespace = if c. is_whitespace ( ) { Ws } else { Cr } ;
973- let limit = if ( i - slice_start + 1 ) <= lim { UnderLim } else { OverLim } ;
974-
975- * state = match ( * state, whitespace, limit) {
976- ( A , Ws , _) => { A }
977- ( A , Cr , _) => { slice_start = i; last_start = i; B }
978-
979- ( B , Cr , UnderLim ) => { B }
980- ( B , Cr , OverLim ) if ( i - last_start + 1 ) > lim => {
981- // A single word has gone over the limit. In this
982- // case we just accept that the word will be too long.
983- B
984- }
985- ( B , Cr , OverLim ) => {
986- * cont = it ( & ss[ slice_start..last_end] ) ;
987- slice_start = last_start;
988- B
989- }
990- ( B , Ws , UnderLim ) => {
991- last_end = i;
992- C
993- }
994- ( B , Ws , OverLim ) => {
995- last_end = i;
996- * cont = it ( & ss[ slice_start..last_end] ) ;
997- A
998- }
999-
1000- ( C , Cr , UnderLim ) => {
1001- last_start = i;
1002- B
908+ fn each_split_within ( desc : & str , lim : usize ) -> Vec < String > {
909+ let mut rows = Vec :: new ( ) ;
910+ for line in desc. trim ( ) . lines ( ) {
911+ let line_chars = line. chars ( ) . chain ( Some ( ' ' ) ) ;
912+ let words = line_chars. fold ( ( Vec :: new ( ) , 0 , 0 ) , |( mut words, a, z) , c | {
913+ let idx = z + c. len_utf8 ( ) ; // Get the current byte offset
914+
915+ // If the char is whitespace, advance the word start and maybe push a word
916+ if c. is_whitespace ( ) {
917+ if a != z {
918+ words. push ( & line[ a..z] ) ;
919+ }
920+ ( words, idx, idx)
1003921 }
1004- ( C , Cr , OverLim ) => {
1005- * cont = it ( & ss[ slice_start..last_end] ) ;
1006- slice_start = i;
1007- last_start = i;
1008- last_end = i;
1009- B
922+ // If the char is not whitespace, continue, retaining the current
923+ else {
924+ ( words, a, idx)
1010925 }
1011- ( C , Ws , OverLim ) => {
1012- * cont = it ( & ss[ slice_start..last_end] ) ;
1013- A
926+ } ) . 0 ;
927+
928+ let mut row = String :: new ( ) ;
929+ for word in words. iter ( ) {
930+ let sep = if row. len ( ) > 0 { Some ( " " ) } else { None } ;
931+ let width = row. width ( )
932+ + word. width ( )
933+ + sep. map ( UnicodeWidthStr :: width) . unwrap_or ( 0 ) ;
934+
935+ if width <= lim {
936+ if let Some ( sep) = sep { row. push_str ( sep) }
937+ row. push_str ( word) ;
938+ continue
1014939 }
1015- ( C , Ws , UnderLim ) => {
1016- C
940+ if row. len ( ) > 0 {
941+ rows. push ( row. clone ( ) ) ;
942+ row. clear ( ) ;
1017943 }
1018- } ;
1019-
1020- * cont
1021- } ;
1022-
1023- ss. char_indices ( ) . all ( |x| machine ( & mut cont, & mut state, x) ) ;
1024-
1025- // Let the automaton 'run out' by supplying trailing whitespace
1026- while cont && match state { B | C => true , A => false } {
1027- machine ( & mut cont, & mut state, ( fake_i, ' ' ) ) ;
1028- fake_i += 1 ;
944+ row. push_str ( word) ;
945+ }
946+ if row. len ( ) > 0 {
947+ rows. push ( row) ;
948+ }
1029949 }
1030- return cont ;
950+ rows
1031951}
1032952
1033953#[ test]
1034954fn test_split_within ( ) {
1035955 fn t ( s : & str , i : usize , u : & [ String ] ) {
1036- let mut v = Vec :: new ( ) ;
1037- each_split_within ( s, i, |s| { v. push ( s. to_string ( ) ) ; true } ) ;
956+ let v = each_split_within ( & ( s. to_string ( ) ) , i) ;
1038957 assert ! ( v. iter( ) . zip( u. iter( ) ) . all( |( a, b) | a == b) ) ;
1039958 }
1040959 t ( "" , 0 , & [ ] ) ;
@@ -1046,7 +965,9 @@ fn test_split_within() {
1046965 "Little lamb" . to_string ( )
1047966 ] ) ;
1048967 t ( "\n Mary had a little lamb\n Little lamb\n " , :: std:: usize:: MAX ,
1049- & [ "Mary had a little lamb\n Little lamb" . to_string ( ) ] ) ;
968+ & [ "Mary had a little lamb" . to_string ( ) ,
969+ "Little lamb" . to_string ( )
970+ ] ) ;
1050971}
1051972
1052973#[ cfg( test) ]
@@ -1827,6 +1748,69 @@ Options:
18271748 assert ! ( usage == expected)
18281749 }
18291750
1751+ #[ test]
1752+ fn test_usage_description_newline_handling ( ) {
1753+ let mut opts = Options :: new ( ) ;
1754+ opts. optflag ( "k" , "k\u{2013} w\u{2013} " ,
1755+ "The word kiwi is normally spelled with two i's" ) ;
1756+ opts. optflag ( "a" , "apple" ,
1757+ "This description forces a new line.\n Here is a premature\n \
1758+ newline") ;
1759+
1760+ let expected =
1761+ "Usage: fruits
1762+
1763+ Options:
1764+ -k, --k–w– The word kiwi is normally spelled with two i's
1765+ -a, --apple This description forces a new line.
1766+ Here is a premature
1767+ newline
1768+ " ;
1769+
1770+ let usage = opts. usage ( "Usage: fruits" ) ;
1771+
1772+ debug ! ( "expected: <<{}>>" , expected) ;
1773+ debug ! ( "generated: <<{}>>" , usage) ;
1774+ assert ! ( usage == expected)
1775+ }
1776+
1777+ #[ test]
1778+ fn test_usage_multiwidth ( ) {
1779+ let mut opts = Options :: new ( ) ;
1780+ opts. optflag ( "a" , "apple" , "apple description" ) ;
1781+ opts. optflag ( "b" , "banana\u{00AB} " , "banana description" ) ;
1782+ opts. optflag ( "c" , "brûlée" , "brûlée quite long description" ) ;
1783+ opts. optflag ( "k" , "kiwi\u{20AC} " , "kiwi description" ) ;
1784+ opts. optflag ( "o" , "orange\u{2039} " , "orange description" ) ;
1785+ opts. optflag ( "r" , "raspberry-but-making-this-option-way-too-long" ,
1786+ "raspberry description is also quite long indeed longer than \
1787+ every other piece of text we might encounter here and thus will \
1788+ be automatically broken up"
1789+ ) ;
1790+
1791+ let expected =
1792+ "Usage: fruits
1793+
1794+ Options:
1795+ -a, --apple apple description
1796+ -b, --banana« banana description
1797+ -c, --brûlée brûlée quite long description
1798+ -k, --kiwi€ kiwi description
1799+ -o, --orange‹ orange description
1800+ -r, --raspberry-but-making-this-option-way-too-long
1801+ raspberry description is also quite long indeed longer
1802+ than every other piece of text we might encounter here
1803+ and thus will be automatically broken up
1804+ " ;
1805+
1806+ let usage = opts. usage ( "Usage: fruits" ) ;
1807+
1808+ debug ! ( "expected: <<{}>>" , expected) ;
1809+ debug ! ( "generated: <<{}>>" , usage) ;
1810+ assert ! ( usage == expected)
1811+ }
1812+
1813+
18301814 #[ test]
18311815 fn test_usage_short_only ( ) {
18321816 let mut opts = Options :: new ( ) ;
0 commit comments