syntax: Handle \r\n in byte string literals

This ended up passing through the lexer but dying later on in parsing when it wasn't handled. The strategy taken was to copy the `str_lit` funciton, but adapt it for bytes. Closes #16278
2014-08-05 15:13:57 -07:00 · 2014-08-05 15:13:57 -07:00 · 74ae05ad90
commit 74ae05ad90
parent 6da38890f1
3 changed files with 59 additions and 18 deletions
--- a/src/libsyntax/parse/mod.rs
+++ b/src/libsyntax/parse/mod.rs
@ -21,6 +21,7 @@ use std::gc::Gc;
 use std::io::File;
 use std::rc::Rc;
 use std::str;
 use std::iter;
 pub mod lexer;
 pub mod parser;
@ -327,7 +328,7 @@ pub fn str_lit(lit: &str) -> String {
    let error = |i| format!("lexer should have rejected {} at {}", lit, i);
    /// Eat everything up to a non-whitespace
-    fn eat<'a>(it: &mut ::std::iter::Peekable<(uint, char), ::std::str::CharOffsets<'a>>) {
+    fn eat<'a>(it: &mut iter::Peekable<(uint, char), str::CharOffsets<'a>>) {
        loop {
            match it.peek().map(|x| x.val1()) {
                Some(' ') | Some('\n') | Some('\r') | Some('\t') => {
@ -471,35 +472,54 @@ pub fn binary_lit(lit: &str) -> Rc<Vec<u8>> {
    // FIXME #8372: This could be a for-loop if it didn't borrow the iterator
    let error = |i| format!("lexer should have rejected {} at {}", lit, i);
    /// Eat everything up to a non-whitespace
    fn eat<'a, I: Iterator<(uint, u8)>>(it: &mut iter::Peekable<(uint, u8), I>) {
        loop {
            match it.peek().map(|x| x.val1()) {
                Some(b' ') | Some(b'\n') | Some(b'\r') | Some(b'\t') => {
                    it.next();
                },
                _ => { break; }
            }
        }
    }
    // binary literals *must* be ASCII, but the escapes don't have to be
-    let mut chars = lit.as_bytes().iter().enumerate().peekable();
+    let mut chars = lit.bytes().enumerate().peekable();
    loop {
        match chars.next() {
-            Some((i, &c)) => {
+            Some((i, b'\\')) => {
-                if c == b'\\' {
+                let em = error(i);
-                    if *chars.peek().expect(error(i).as_slice()).val1() == b'\n' {
+                match chars.peek().expect(em.as_slice()).val1() {
-                        loop {
+                    b'\n' => eat(&mut chars),
-                            // eat everything up to a non-whitespace
+                    b'\r' => {
-                            match chars.peek().map(|x| *x.val1()) {
+                        chars.next();
-                                Some(b' ') | Some(b'\n') | Some(b'\r') | Some(b'\t') => {
+                        if chars.peek().expect(em.as_slice()).val1() != b'\n' {
-                                    chars.next();
+                            fail!("lexer accepted bare CR");
                                },
                                _ => { break; }
                            }
                        }
-                    } else {
+                        eat(&mut chars);
                    }
                    _ => {
                        // otherwise, a normal escape
                        let (c, n) = byte_lit(lit.slice_from(i));
-                        for _ in range(0, n - 1) { // we don't need to move past the first \
+                        // we don't need to move past the first \
                        for _ in range(0, n - 1) {
                            chars.next();
                        }
                        res.push(c);
                    }
                } else {
                    res.push(c);
                }
            },
-            None => { break; }
+            Some((i, b'\r')) => {
                let em = error(i);
                if chars.peek().expect(em.as_slice()).val1() != b'\n' {
                    fail!("lexer accepted bare CR");
                }
                chars.next();
                res.push(b'\n');
            }
            Some((_, c)) => res.push(c),
            None => break,
        }
    }
--- a/src/test/run-pass/.gitattributes
+++ b/src/test/run-pass/.gitattributes
@ -1 +1,2 @@
 lexer-crlf-line-endings-string-literal-doc-comment.rs -text
 issue-16278.rs -text
--- a/src/test/run-pass/issue-16278.rs
+++ b/src/test/run-pass/issue-16278.rs
@ -0,0 +1,20 @@
 // Copyright 2014 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 // ignore-tidy-cr
 // this file has some special \r\n endings (use xxd to see them)
 fn main() {assert_eq!(b"", b"\
                                   ");
 assert_eq!(b"\n", b"
 ");
 }
`@ -1 +1,2 @@`
	`lexer-crlf-line-endings-string-literal-doc-comment.rs -text`	`lexer-crlf-line-endings-string-literal-doc-comment.rs -text`
		`issue-16278.rs -text`