convert \r\n to \n when loading files
This commit is contained in:
parent
60960a260f
commit
004f3acec1
2 changed files with 76 additions and 0 deletions
|
@ -1045,6 +1045,7 @@ impl SourceFile {
|
|||
mut src: String,
|
||||
start_pos: BytePos) -> Result<SourceFile, OffsetOverflowError> {
|
||||
remove_bom(&mut src);
|
||||
normalize_newlines(&mut src);
|
||||
|
||||
let src_hash = {
|
||||
let mut hasher: StableHasher<u128> = StableHasher::new();
|
||||
|
@ -1212,6 +1213,61 @@ fn remove_bom(src: &mut String) {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/// Replaces `\r\n` with `\n` in-place in `src`.
|
||||
///
|
||||
/// Returns error if there's a lone `\r` in the string
|
||||
fn normalize_newlines(src: &mut String) {
|
||||
if !src.as_bytes().contains(&b'\r') {
|
||||
return;
|
||||
}
|
||||
|
||||
// We replace `\r\n` with `\n` in-place, which doesn't break utf-8 encoding.
|
||||
// While we *can* call `as_mut_vec` and do surgery on the live string
|
||||
// directly, let's rather steal the contents of `src`. This makes the code
|
||||
// safe even if a panic occurs.
|
||||
|
||||
let mut buf = std::mem::replace(src, String::new()).into_bytes();
|
||||
let mut gap_len = 0;
|
||||
let mut tail = buf.as_mut_slice();
|
||||
loop {
|
||||
let idx = match find_crlf(&tail[gap_len..]) {
|
||||
None => tail.len(),
|
||||
Some(idx) => idx + gap_len,
|
||||
};
|
||||
tail.copy_within(gap_len..idx, 0);
|
||||
tail = &mut tail[idx - gap_len..];
|
||||
if tail.len() == gap_len {
|
||||
break;
|
||||
}
|
||||
gap_len += 1;
|
||||
}
|
||||
|
||||
// Account for removed `\r`.
|
||||
// After `set_len`, `buf` is guaranteed to contain utf-8 again.
|
||||
let new_len = buf.len() - gap_len;
|
||||
unsafe {
|
||||
buf.set_len(new_len);
|
||||
*src = String::from_utf8_unchecked(buf);
|
||||
}
|
||||
|
||||
fn find_crlf(src: &[u8]) -> Option<usize> {
|
||||
let mut search_idx = 0;
|
||||
while let Some(idx) = find_cr(&src[search_idx..]) {
|
||||
if src[search_idx..].get(idx + 1) != Some(&b'\n') {
|
||||
search_idx += idx + 1;
|
||||
continue;
|
||||
}
|
||||
return Some(search_idx + idx);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn find_cr(src: &[u8]) -> Option<usize> {
|
||||
src.iter().position(|&b| b == b'\r')
|
||||
}
|
||||
}
|
||||
|
||||
// _____________________________________________________________________________
|
||||
// Pos, BytePos, CharPos
|
||||
//
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue