Add a b"xx" byte string literal of type &'static [u8].

SimonSapin · SimonSapin · commit d7e01b5809cd · 2014-06-17T23:43:18.000+02:00
diff --git a/src/libcore/str.rs b/src/libcore/str.rs
@@ -560,6 +560,8 @@ Section: Comparing strings
 
 // share the implementation of the lang-item vs. non-lang-item
 // eq_slice.
+/// NOTE: This function is (ab)used in rustc::middle::trans::_match
+/// to compare &[u8] byte slices that are not necessarily valid UTF-8.
 #[inline]
 fn eq_slice_(a: &str, b: &str) -> bool {
     #[allow(ctypes)]
@@ -572,6 +574,8 @@ fn eq_slice_(a: &str, b: &str) -> bool {
 }
 
 /// Bytewise slice equality
+/// NOTE: This function is (ab)used in rustc::middle::trans::_match
+/// to compare &[u8] byte slices that are not necessarily valid UTF-8.
 #[cfg(not(test))]
 #[lang="str_eq"]
 #[inline]
diff --git a/src/libregex_macros/lib.rs b/src/libregex_macros/lib.rs
@@ -182,7 +182,7 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
         #[allow(unused_variable)]
         fn run(&mut self, start: uint, end: uint) -> Vec<Option<uint>> {
             let mut matched = false;
-            let prefix_bytes: &[u8] = &$prefix_bytes;
+            let prefix_bytes: &[u8] = $prefix_bytes;
             let mut clist = &mut Threads::new(self.which);
             let mut nlist = &mut Threads::new(self.which);
 
diff --git a/src/librustc/middle/const_eval.rs b/src/librustc/middle/const_eval.rs
@@ -529,6 +529,7 @@ pub fn compare_const_vals(a: &const_val, b: &const_val) -> Option<int> {
         (&const_float(a), &const_float(b)) => compare_vals(a, b),
         (&const_str(ref a), &const_str(ref b)) => compare_vals(a, b),
         (&const_bool(a), &const_bool(b)) => compare_vals(a, b),
+        (&const_binary(ref a), &const_binary(ref b)) => compare_vals(a, b),
         _ => None
     }
 }
diff --git a/src/librustc/middle/trans/_match.rs b/src/librustc/middle/trans/_match.rs
@@ -1273,13 +1273,24 @@ fn compare_values<'a>(
                     val: bool_to_i1(result.bcx, result.val)
                 }
             }
-            _ => cx.sess().bug("only scalars and strings supported in compare_values"),
+            _ => cx.sess().bug("only strings supported in compare_values"),
         },
         ty::ty_rptr(_, mt) => match ty::get(mt.ty).sty {
             ty::ty_str => compare_str(cx, lhs, rhs, rhs_t),
-            _ => cx.sess().bug("only scalars and strings supported in compare_values"),
+            ty::ty_vec(mt, _) => match ty::get(mt.ty).sty {
+                ty::ty_uint(ast::TyU8) => {
+                    // NOTE: cast &[u8] to &str and abuse the str_eq lang item,
+                    // which calls memcmp().
+                    let t = ty::mk_str_slice(cx.tcx(), ty::ReStatic, ast::MutImmutable);
+                    let lhs = BitCast(cx, lhs, type_of::type_of(cx.ccx(), t).ptr_to());
+                    let rhs = BitCast(cx, rhs, type_of::type_of(cx.ccx(), t).ptr_to());
+                    compare_str(cx, lhs, rhs, rhs_t)
+                },
+                _ => cx.sess().bug("only byte strings supported in compare_values"),
+            },
+            _ => cx.sess().bug("on string and byte strings supported in compare_values"),
         },
-        _ => cx.sess().bug("only scalars and strings supported in compare_values"),
+        _ => cx.sess().bug("only scalars, byte strings, and strings supported in compare_values"),
     }
 }
 
diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs
@@ -140,7 +140,8 @@ fn doit(sess: &parse::ParseSess, mut lexer: lexer::StringReader,
             }
 
             // text literals
-            t::LIT_BYTE(..) | t::LIT_CHAR(..) | t::LIT_STR(..) | t::LIT_STR_RAW(..) => "string",
+            t::LIT_BYTE(..) | t::LIT_BINARY(..) |
+                t::LIT_CHAR(..) | t::LIT_STR(..) | t::LIT_STR_RAW(..) => "string",
 
             // number literals
             t::LIT_INT(..) | t::LIT_UINT(..) | t::LIT_INT_UNSUFFIXED(..) |
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
@@ -654,7 +654,8 @@ impl<'a> StringReader<'a> {
             // Note: r as in r" or r#" is part of a raw string literal,
             // b as in b' is part of a byte literal.
             // They are not identifiers, and are handled further down.
-           ('r', Some('"')) | ('r', Some('#')) | ('b', Some('\'')) => false,
+           ('r', Some('"')) | ('r', Some('#')) |
+           ('b', Some('"')) | ('b', Some('\'')) => false,
            _ => true
         } {
             let start = self.last_pos;
@@ -859,62 +860,124 @@ impl<'a> StringReader<'a> {
           }
           'b' => {
             self.bump();
-            assert!(self.curr_is('\''), "Should have been a token::IDENT");
-            self.bump();
-            let start = self.last_pos;
-
-            // the eof will be picked up by the final `'` check below
-            let mut c2 = self.curr.unwrap_or('\x00');
-            self.bump();
+            return match self.curr {
+                Some('\'') => parse_byte(self),
+                Some('"') => parse_byte_string(self),
+                _ => unreachable!()  // Should have been a token::IDENT above.
+            };
 
-            match c2 {
-                '\\' => {
-                    // '\X' for some X must be a character constant:
-                    let escaped = self.curr;
-                    let escaped_pos = self.last_pos;
-                    self.bump();
-                    match escaped {
-                        None => {}
-                        Some(e) => {
-                            c2 = match e {
-                                'n' => '\n',
-                                'r' => '\r',
-                                't' => '\t',
-                                '\\' => '\\',
-                                '\'' => '\'',
-                                '"' => '"',
-                                '0' => '\x00',
-                                'x' => self.scan_numeric_escape(2u, '\''),
-                                c2 => {
-                                    self.err_span_char(escaped_pos, self.last_pos,
-                                                       "unknown byte escape", c2);
-                                    c2
+            fn parse_byte(self_: &mut StringReader) -> token::Token {
+                self_.bump();
+                let start = self_.last_pos;
+
+                // the eof will be picked up by the final `'` check below
+                let mut c2 = self_.curr.unwrap_or('\x00');
+                self_.bump();
+
+                match c2 {
+                    '\\' => {
+                        // '\X' for some X must be a character constant:
+                        let escaped = self_.curr;
+                        let escaped_pos = self_.last_pos;
+                        self_.bump();
+                        match escaped {
+                            None => {}
+                            Some(e) => {
+                                c2 = match e {
+                                    'n' => '\n',
+                                    'r' => '\r',
+                                    't' => '\t',
+                                    '\\' => '\\',
+                                    '\'' => '\'',
+                                    '"' => '"',
+                                    '0' => '\x00',
+                                    'x' => self_.scan_numeric_escape(2u, '\''),
+                                    c2 => {
+                                        self_.err_span_char(
+                                            escaped_pos, self_.last_pos,
+                                            "unknown byte escape", c2);
+                                        c2
+                                    }
                                 }
                             }
                         }
                     }
+                    '\t' | '\n' | '\r' | '\'' => {
+                        self_.err_span_char( start, self_.last_pos,
+                            "byte constant must be escaped", c2);
+                    }
+                    _ => if c2 > '\x7F' {
+                        self_.err_span_char( start, self_.last_pos,
+                            "byte constant must be ASCII. \
+                             Use a \\xHH escape for a non-ASCII byte", c2);
+                    }
                 }
-                '\t' | '\n' | '\r' | '\'' => {
-                    self.err_span_char( start, self.last_pos,
-                        "byte constant must be escaped", c2);
-                }
-                _ if c2 > '\x7F' => {
-                    self.err_span_char( start, self.last_pos,
-                        "byte constant must be ASCII. \
-                         Use a \\xHH escape for a non-ASCII byte", c2);
+                if !self_.curr_is('\'') {
+                    // Byte offsetting here is okay because the
+                    // character before position `start` are an
+                    // ascii single quote and ascii 'b'.
+                    self_.fatal_span_verbose(
+                        start - BytePos(2), self_.last_pos,
+                        "unterminated byte constant".to_string());
                 }
-                _ => {}
+                self_.bump(); // advance curr past token
+                return token::LIT_BYTE(c2 as u8);
             }
-            if !self.curr_is('\'') {
-                self.fatal_span_verbose(
-                                   // Byte offsetting here is okay because the
-                                   // character before position `start` are an
-                                   // ascii single quote and ascii 'b'.
-                                   start - BytePos(2), self.last_pos,
-                                   "unterminated byte constant".to_string());
+
+            fn parse_byte_string(self_: &mut StringReader) -> token::Token {
+                self_.bump();
+                let start = self_.last_pos;
+                let mut value = Vec::new();
+                while !self_.curr_is('"') {
+                    if self_.is_eof() {
+                        self_.fatal_span(start, self_.last_pos,
+                                         "unterminated double quote byte string");
+                    }
+
+                    let ch = self_.curr.unwrap();
+                    self_.bump();
+                    match ch {
+                      '\\' => {
+                        if self_.is_eof() {
+                            self_.fatal_span(start, self_.last_pos,
+                                             "unterminated double quote byte string");
+                        }
+
+                        let escaped = self_.curr.unwrap();
+                        let escaped_pos = self_.last_pos;
+                        self_.bump();
+                        match escaped {
+                          'n' => value.push('\n' as u8),
+                          'r' => value.push('\r' as u8),
+                          't' => value.push('\t' as u8),
+                          '\\' => value.push('\\' as u8),
+                          '\'' => value.push('\'' as u8),
+                          '"' => value.push('"' as u8),
+                          '\n' => self_.consume_whitespace(),
+                          '0' => value.push(0),
+                          'x' => {
+                            value.push(self_.scan_numeric_escape(2u, '"') as u8);
+                          }
+                          c2 => {
+                            self_.err_span_char(escaped_pos, self_.last_pos,
+                                                "unknown byte string escape", c2);
+                          }
+                        }
+                      }
+                      _ => {
+                        if ch <= '\x7F' {
+                            value.push(ch as u8)
+                        } else {
+                            self_.err_span_char(self_.last_pos, self_.last_pos,
+                                "byte string must be ASCII. \
+                                 Use a \\xHH escape for a non-ASCII byte", ch);
+                        }
+                      }
+                    }
+                }
+                self_.bump();
+                return token::LIT_BINARY(Rc::new(value));
             }
-            self.bump(); // advance curr past token
-            return token::LIT_BYTE(c2 as u8);
           }
           '"' => {
             let mut accum_str = String::new();
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs
@@ -33,7 +33,7 @@ use ast::{ForeignItem, ForeignItemStatic, ForeignItemFn, ForeignMod};
 use ast::{Ident, NormalFn, Inherited, Item, Item_, ItemStatic};
 use ast::{ItemEnum, ItemFn, ItemForeignMod, ItemImpl};
 use ast::{ItemMac, ItemMod, ItemStruct, ItemTrait, ItemTy, Lit, Lit_};
-use ast::{LitBool, LitFloat, LitFloatUnsuffixed, LitInt, LitChar, LitByte};
+use ast::{LitBool, LitFloat, LitFloatUnsuffixed, LitInt, LitChar, LitByte, LitBinary};
 use ast::{LitIntUnsuffixed, LitNil, LitStr, LitUint, Local, LocalLet};
 use ast::{MutImmutable, MutMutable, Mac_, MacInvocTT, Matcher, MatchNonterminal};
 use ast::{MatchSeq, MatchTok, Method, MutTy, BiMul, Mutability};
@@ -1529,6 +1529,7 @@ impl<'a> Parser<'a> {
             token::LIT_STR_RAW(s, n) => {
                 LitStr(self.id_to_interned_str(s), ast::RawStr(n))
             }
+            token::LIT_BINARY(ref v) => LitBinary(v.clone()),
             token::LPAREN => { self.expect(&token::RPAREN); LitNil },
             _ => { self.unexpected_last(tok); }
         }
diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs
@@ -87,6 +87,7 @@ pub enum Token {
     LIT_FLOAT_UNSUFFIXED(ast::Ident),
     LIT_STR(ast::Ident),
     LIT_STR_RAW(ast::Ident, uint), /* raw str delimited by n hash symbols */
+    LIT_BINARY(Rc<Vec<u8>>),
 
     /* Name components */
     // an identifier contains an "is_mod_name" boolean,
@@ -231,17 +232,22 @@ pub fn to_str(t: &Token) -> String {
         body
       }
       LIT_STR(s) => {
-          (format!("\"{}\"", get_ident(s).get().escape_default())).to_string()
+          format!("\"{}\"", get_ident(s).get().escape_default())
       }
       LIT_STR_RAW(s, n) => {
-          (format!("r{delim}\"{string}\"{delim}",
-                  delim="#".repeat(n), string=get_ident(s))).to_string()
+        format!("r{delim}\"{string}\"{delim}",
+                 delim="#".repeat(n), string=get_ident(s))
+      }
+      LIT_BINARY(ref v) => {
+          format!(
+            "b\"{}\"",
+            v.iter().map(|&b| b as char).collect::<String>().escape_default())
       }
 
       /* Name components */
       IDENT(s, _) => get_ident(s).get().to_string(),
       LIFETIME(s) => {
-          (format!("{}", get_ident(s))).to_string()
+          format!("{}", get_ident(s))
       }
       UNDERSCORE => "_".to_string(),
 
@@ -291,6 +297,7 @@ pub fn can_begin_expr(t: &Token) -> bool {
       LIT_FLOAT_UNSUFFIXED(_) => true,
       LIT_STR(_) => true,
       LIT_STR_RAW(_, _) => true,
+      LIT_BINARY(_) => true,
       POUND => true,
       AT => true,
       NOT => true,
@@ -330,6 +337,7 @@ pub fn is_lit(t: &Token) -> bool {
       LIT_FLOAT_UNSUFFIXED(_) => true,
       LIT_STR(_) => true,
       LIT_STR_RAW(_, _) => true,
+      LIT_BINARY(_) => true,
       _ => false
     }
 }
diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs
@@ -2342,19 +2342,9 @@ impl<'a> State<'a> {
             ast::LitBool(val) => {
                 if val { word(&mut self.s, "true") } else { word(&mut self.s, "false") }
             }
-            ast::LitBinary(ref arr) => {
-                try!(self.ibox(indent_unit));
-                try!(word(&mut self.s, "["));
-                try!(self.commasep_cmnt(Inconsistent,
-                                        arr.as_slice(),
-                                        |s, u| {
-                                            word(&mut s.s,
-                                                 format!("{}",
-                                                         *u).as_slice())
-                                        },
-                                        |_| lit.span));
-                try!(word(&mut self.s, "]"));
-                self.end()
+            ast::LitBinary(ref v) => {
+                let escaped: String = v.iter().map(|&b| b as char).collect();
+                word(&mut self.s, format!("b\"{}\"", escaped.escape_default()).as_slice())
             }
         }
     }
diff --git a/src/test/compile-fail/byte-string-literals.rs b/src/test/compile-fail/byte-string-literals.rs
@@ -0,0 +1,23 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+
+// ignore-tidy-tab
+
+static FOO: &'static [u8] = b"\f";  //~ ERROR unknown byte escape
+
+pub fn main() {
+    b"\f";  //~ ERROR unknown byte escape
+    b"\x0Z";  //~ ERROR illegal character in numeric character escape: Z
+    b"é";  //~ ERROR byte constant must be ASCII
+    b"a  //~ ERROR unterminated double quote byte string
+}
+
+
diff --git a/src/test/compile-fail/concat.rs b/src/test/compile-fail/concat.rs
@@ -10,6 +10,7 @@
 
 fn main() {
     concat!(b'f');  //~ ERROR: cannot concatenate a binary literal
+    concat!(b"foo");  //~ ERROR: cannot concatenate a binary literal
     concat!(foo);   //~ ERROR: expected a literal
     concat!(foo()); //~ ERROR: expected a literal
 }
diff --git a/src/test/run-pass/byte-literals.rs b/src/test/run-pass/byte-literals.rs

Original file line number	Diff line number	Diff line change
`@@ -529,6 +529,7 @@ pub fn compare_const_vals(a: &const_val, b: &const_val) -> Option<int> {`
`529`	`529`	`(&const_float(a), &const_float(b)) => compare_vals(a, b),`
`530`	`530`	`(&const_str(ref a), &const_str(ref b)) => compare_vals(a, b),`
`531`	`531`	`(&const_bool(a), &const_bool(b)) => compare_vals(a, b),`
	`532`	`+ (&const_binary(ref a), &const_binary(ref b)) => compare_vals(a, b),`
`532`	`533`	`_ => None`
`533`	`534`	`}`
`534`	`535`	`}`
Original file line number	Diff line number	Diff line change
`@@ -140,7 +140,8 @@ fn doit(sess: &parse::ParseSess, mut lexer: lexer::StringReader,`
`140`	`140`	`}`
`141`	`141`
`142`	`142`	`// text literals`
`143`		`- t::LIT_BYTE(..) \| t::LIT_CHAR(..) \| t::LIT_STR(..) \| t::LIT_STR_RAW(..) => "string",`
	`143`	`+ t::LIT_BYTE(..) \| t::LIT_BINARY(..) \|`
	`144`	`+ t::LIT_CHAR(..) \| t::LIT_STR(..) \| t::LIT_STR_RAW(..) => "string",`
`144`	`145`
`145`	`146`	`// number literals`
`146`	`147`	`t::LIT_INT(..) \| t::LIT_UINT(..) \| t::LIT_INT_UNSUFFIXED(..) \|`
Original file line number	Diff line number	Diff line change
`@@ -2342,19 +2342,9 @@ impl<'a> State<'a> {`
`2342`	`2342`	`ast::LitBool(val) => {`
`2343`	`2343`	`if val { word(&mut self.s, "true") } else { word(&mut self.s, "false") }`
`2344`	`2344`	`}`
`2345`		`- ast::LitBinary(ref arr) => {`
`2346`		`- try!(self.ibox(indent_unit));`
`2347`		`- try!(word(&mut self.s, "["));`
`2348`		`- try!(self.commasep_cmnt(Inconsistent,`
`2349`		`- arr.as_slice(),`
`2350`		`- \|s, u\| {`
`2351`		`- word(&mut s.s,`
`2352`		`- format!("{}",`
`2353`		`- *u).as_slice())`
`2354`		`- },`
`2355`		`- \|_\| lit.span));`
`2356`		`- try!(word(&mut self.s, "]"));`
`2357`		`- self.end()`
	`2345`	`+ ast::LitBinary(ref v) => {`
	`2346`	`+ let escaped: String = v.iter().map(\|&b\| b as char).collect();`
	`2347`	`+ word(&mut self.s, format!("b\"{}\"", escaped.escape_default()).as_slice())`
`2358`	`2348`	`}`
`2359`	`2349`	`}`
`2360`	`2350`	`}`