1
Fork 0

Handle CRLF properly in the lexer

The lexer already ignores CRLF in between tokens, but it doesn't
properly handle carriage returns inside strings and doc comments. Teach
it to treat CRLF as LF inside these tokens, and to disallow carriage
returns that are not followed by linefeeds. This includes handling an
escaped CRLF inside a regular string token the same way it handles an
escaped LF.

This is technically a breaking change, as bare carriage returns are no
longer allowed, and CRLF sequences are now treated as LF inside strings
and doc comments, but it's very unlikely to actually affect any
real-world code.

This change is necessary to have Rust code compile on Windows the same
way it does on Unix. The mozilla/rust repository explicitly sets eol=lf
for Rust source files, but other Rust repositories don't. Notably,
rust-http cannot be compiled on Windows without converting the CRLF line
endings back to LF.

[breaking-change]
This commit is contained in:
Kevin Ballard 2014-05-24 01:13:59 -07:00
parent d41058ed39
commit 8a8e497ae7
5 changed files with 215 additions and 23 deletions

View file

@ -288,6 +288,8 @@ mod test {
use owned_slice::OwnedSlice;
use ast;
use abi;
use attr;
use attr::AttrMetaMethods;
use parse::parser::Parser;
use parse::token::{str_to_ident};
use util::parser_testing::{string_to_tts, string_to_parser};
@ -726,4 +728,24 @@ mod test {
}".to_string());
}
#[test] fn crlf_doc_comments() {
let sess = new_parse_sess();
let name = "<source>".to_string();
let source = "/// doc comment\r\nfn foo() {}".to_string();
let item = parse_item_from_source_str(name.clone(), source, Vec::new(), &sess).unwrap();
let doc = attr::first_attr_value_str_by_name(item.attrs.as_slice(), "doc").unwrap();
assert_eq!(doc.get(), "/// doc comment");
let source = "/// doc comment\r\n/// line 2\r\nfn foo() {}".to_string();
let item = parse_item_from_source_str(name.clone(), source, Vec::new(), &sess).unwrap();
let docs = item.attrs.iter().filter(|a| a.name().get() == "doc")
.map(|a| a.value_str().unwrap().get().to_string()).collect::<Vec<_>>();
assert_eq!(docs.as_slice(), &["/// doc comment".to_string(), "/// line 2".to_string()]);
let source = "/** doc comment\r\n * with CRLF */\r\nfn foo() {}".to_string();
let item = parse_item_from_source_str(name, source, Vec::new(), &sess).unwrap();
let doc = attr::first_attr_value_str_by_name(item.attrs.as_slice(), "doc").unwrap();
assert_eq!(doc.get(), "/** doc comment\n * with CRLF */");
}
}