@@ -404,29 +404,163 @@ pub fn len_utf8_bytes(c: char) -> uint {
404404 }
405405}
406406
407- # [ allow ( missing_doc ) ]
407+ /// Useful functions for Unicode characters.
408408pub trait Char {
409+ /// Returns whether the specified character is considered a Unicode
410+ /// alphabetic code point.
409411 fn is_alphabetic ( & self ) -> bool ;
412+
413+ /// Returns whether the specified character satisfies the 'XID_Start'
414+ /// Unicode property.
415+ ///
416+ /// 'XID_Start' is a Unicode Derived Property specified in
417+ /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
418+ /// mostly similar to ID_Start but modified for closure under NFKx.
410419 fn is_XID_start ( & self ) -> bool ;
420+
421+ /// Returns whether the specified `char` satisfies the 'XID_Continue'
422+ /// Unicode property.
423+ ///
424+ /// 'XID_Continue' is a Unicode Derived Property specified in
425+ /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
426+ /// mostly similar to 'ID_Continue' but modified for closure under NFKx.
411427 fn is_XID_continue ( & self ) -> bool ;
428+
429+
430+ /// Indicates whether a character is in lowercase.
431+ ///
432+ /// This is defined according to the terms of the Unicode Derived Core
433+ /// Property `Lowercase`.
412434 fn is_lowercase ( & self ) -> bool ;
435+
436+ /// Indicates whether a character is in uppercase.
437+ ///
438+ /// This is defined according to the terms of the Unicode Derived Core
439+ /// Property `Uppercase`.
413440 fn is_uppercase ( & self ) -> bool ;
441+
442+ /// Indicates whether a character is whitespace.
443+ ///
444+ /// Whitespace is defined in terms of the Unicode Property `White_Space`.
414445 fn is_whitespace ( & self ) -> bool ;
446+
447+ /// Indicates whether a character is alphanumeric.
448+ ///
449+ /// Alphanumericness is defined in terms of the Unicode General Categories
450+ /// 'Nd', 'Nl', 'No' and the Derived Core Property 'Alphabetic'.
415451 fn is_alphanumeric ( & self ) -> bool ;
452+
453+ /// Indicates whether a character is a control code point.
454+ ///
455+ /// Control code points are defined in terms of the Unicode General
456+ /// Category `Cc`.
416457 fn is_control ( & self ) -> bool ;
458+
459+ /// Indicates whether the character is numeric (Nd, Nl, or No).
417460 fn is_digit ( & self ) -> bool ;
461+
462+ /// Checks if a `char` parses as a numeric digit in the given radix.
463+ ///
464+ /// Compared to `is_digit()`, this function only recognizes the characters
465+ /// `0-9`, `a-z` and `A-Z`.
466+ ///
467+ /// # Return value
468+ ///
469+ /// Returns `true` if `c` is a valid digit under `radix`, and `false`
470+ /// otherwise.
471+ ///
472+ /// # Failure
473+ ///
474+ /// Fails if given a radix > 36.
418475 fn is_digit_radix ( & self , radix : uint ) -> bool ;
476+
477+ /// Converts a character to the corresponding digit.
478+ ///
479+ /// # Return value
480+ ///
481+ /// If `c` is between '0' and '9', the corresponding value between 0 and
482+ /// 9. If `c` is 'a' or 'A', 10. If `c` is 'b' or 'B', 11, etc. Returns
483+ /// none if the character does not refer to a digit in the given radix.
484+ ///
485+ /// # Failure
486+ ///
487+ /// Fails if given a radix outside the range [0..36].
419488 fn to_digit ( & self , radix : uint ) -> Option < uint > ;
489+
490+ /// Converts a character to its lowercase equivalent.
491+ ///
492+ /// The case-folding performed is the common or simple mapping. See
493+ /// `to_uppercase()` for references and more information.
494+ ///
495+ /// # Return value
496+ ///
497+ /// Returns the lowercase equivalent of the character, or the character
498+ /// itself if no conversion is possible.
420499 fn to_lowercase ( & self ) -> char ;
500+
501+ /// Converts a character to its uppercase equivalent.
502+ ///
503+ /// The case-folding performed is the common or simple mapping: it maps
504+ /// one unicode codepoint (one character in Rust) to its uppercase
505+ /// equivalent according to the Unicode database [1]. The additional
506+ /// `SpecialCasing.txt` is not considered here, as it expands to multiple
507+ /// codepoints in some cases.
508+ ///
509+ /// A full reference can be found here [2].
510+ ///
511+ /// # Return value
512+ ///
513+ /// Returns the uppercase equivalent of the character, or the character
514+ /// itself if no conversion was made.
515+ ///
516+ /// [1]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
517+ ///
518+ /// [2]: http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf#G33992
421519 fn to_uppercase ( & self ) -> char ;
520+
521+ /// Converts a number to the character representing it.
522+ ///
523+ /// # Return value
524+ ///
525+ /// Returns `Some(char)` if `num` represents one digit under `radix`,
526+ /// using one character of `0-9` or `a-z`, or `None` if it doesn't.
527+ ///
528+ /// # Failure
529+ ///
530+ /// Fails if given a radix > 36.
422531 fn from_digit ( num : uint , radix : uint ) -> Option < char > ;
532+
533+ /// Returns the hexadecimal Unicode escape of a character.
534+ ///
535+ /// The rules are as follows:
536+ ///
537+ /// * Characters in [0,0xff] get 2-digit escapes: `\\xNN`
538+ /// * Characters in [0x100,0xffff] get 4-digit escapes: `\\uNNNN`.
539+ /// * Characters above 0x10000 get 8-digit escapes: `\\UNNNNNNNN`.
423540 fn escape_unicode ( & self , f: |char|) ;
541+
542+ /// Returns a 'default' ASCII and C++11-like literal escape of a
543+ /// character.
544+ ///
545+ /// The default is chosen with a bias toward producing literals that are
546+ /// legal in a variety of languages, including C++11 and similar C-family
547+ /// languages. The exact rules are:
548+ ///
549+ /// * Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively.
550+ /// * Single-quote, double-quote and backslash chars are backslash-
551+ /// escaped.
552+ /// * Any other chars in the range [0x20,0x7e] are not escaped.
553+ /// * Any other chars are given hex unicode escapes; see `escape_unicode`.
424554 fn escape_default ( & self , f: |char|) ;
555+
556+ /// Returns the amount of bytes this character would need if encoded in
557+ /// UTF-8.
425558 fn len_utf8_bytes ( & self ) -> uint ;
426559
427- /// Encodes this `char` as utf -8 into the provided byte- buffer
560+ /// Encodes this character as UTF -8 into the provided byte buffer.
428561 ///
429- /// The buffer must be at least 4 bytes long or a runtime failure will occur.
562+ /// The buffer must be at least 4 bytes long or a runtime failure will
563+ /// occur.
430564 ///
431565 /// This will then return the number of characters written to the slice.
432566 fn encode_utf8 ( & self , dst : & mut [ u8 ] ) -> uint ;
0 commit comments