Add SSE2 accelerated version of FileMap analysis.

2018-05-29 17:50:13 +02:00 · 2018-05-29 17:50:13 +02:00 · 5a6dc8c4f5
commit 5a6dc8c4f5
parent 3497138634
4 changed files with 445 additions and 68 deletions
--- a/src/libsyntax_pos/lib.rs
+++ b/src/libsyntax_pos/lib.rs
@ -24,6 +24,7 @@
 #![feature(optin_builtin_traits)]
 #![allow(unused_attributes)]
 #![feature(specialization)]
+#![feature(stdsimd)]

 use std::borrow::Cow;
 use std::cell::Cell;
@ -47,6 +48,9 @@ use serialize::{Encodable, Decodable, Encoder, Decoder};
 extern crate serialize;
 extern crate serialize as rustc_serialize; // used by deriving

+#[macro_use]
+extern crate cfg_if;
+
 extern crate unicode_width;

 pub mod edition;
@ -58,6 +62,8 @@ pub use span_encoding::{Span, DUMMY_SP};

 pub mod symbol;

+mod analyze_filemap;
+
 pub struct Globals {
    symbol_interner: Lock<symbol::Interner>,
    span_interner: Lock<span_encoding::SpanInterner>,
@ -652,7 +658,7 @@ impl From<Vec<Span>> for MultiSpan {
 pub const NO_EXPANSION: SyntaxContext = SyntaxContext::empty();

 /// Identifies an offset of a multi-byte character in a FileMap
-#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq)]
+#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq, Debug)]
 pub struct MultiByteChar {
    /// The absolute offset of the character in the CodeMap
    pub pos: BytePos,
@ -661,7 +667,7 @@ pub struct MultiByteChar {
 }

 /// Identifies an offset of a non-narrow character in a FileMap
-#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq)]
+#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq, Debug)]
 pub enum NonNarrowChar {
    /// Represents a zero-width character
    ZeroWidth(BytePos),
@ -950,7 +956,7 @@ impl FileMap {
        let end_pos = start_pos.to_usize() + src.len();

        let (lines, multibyte_chars, non_narrow_chars) =
-            Self::find_newlines_and_special_chars(&src[..], start_pos);
+            analyze_filemap::analyze_filemap(&src[..], start_pos);

        FileMap {
            name,
@ -969,71 +975,6 @@ impl FileMap {
        }
    }

-    fn find_newlines_and_special_chars(src: &str, filemap_start_pos: BytePos)
-        -> (Vec<BytePos>, Vec<MultiByteChar>, Vec<NonNarrowChar>) {
-
-        let mut index = 0;
-        let mut lines = vec![filemap_start_pos];
-        let mut multibyte_chars = vec![];
-        let mut non_narrow_chars = vec![];
-
-        while index < src.len() {
-            let byte_pos = BytePos::from_usize(index) + filemap_start_pos;
-            let byte = src.as_bytes()[index];
-
-            if byte.is_ascii() {
-                match byte {
-                    b'\n' => {
-                        lines.push(byte_pos + BytePos(1));
-                    }
-                    b'\t' => {
-                        // Tabs will consume 4 columns.
-                        non_narrow_chars.push(NonNarrowChar::new(byte_pos, 4));
-                    }
-                    c => if c.is_ascii_control() {
-                        // Assume control characters are zero width.
-                        non_narrow_chars.push(NonNarrowChar::new(byte_pos, 0));
-                    }
-                }
-
-                index += 1;
-            } else {
-                let c = (&src[index..]).chars().next().unwrap();
-                let c_len = c.len_utf8();
-
-                if c_len > 1 {
-                    assert!(c_len >=2 && c_len <= 4);
-                    let mbc = MultiByteChar {
-                        pos: byte_pos,
-                        bytes: c_len,
-                    };
-                    multibyte_chars.push(mbc);
-                }
-
-                // Assume control characters are zero width.
-                // FIXME: How can we decide between `width` and `width_cjk`?
-                let c_width = unicode_width::UnicodeWidthChar::width(c).unwrap_or(0);
-
-                if c_width != 1 {
-                    non_narrow_chars.push(NonNarrowChar::new(byte_pos, c_width));
-                }
-
-                index += c_len;
-            }
-        }
-
-        // The loop above optimistically registers a new line *after* each of \n
-        // it encounters. If that point is already outside the filemap, remove
-        // it again.
-        if let Some(&last_line_start) = lines.last() {
-            if last_line_start == filemap_start_pos + BytePos::from_usize(src.len()) {
-                lines.pop();
-            }
-        }
-
-        (lines, multibyte_chars, non_narrow_chars)
-    }
-
    /// Return the BytePos of the beginning of the current line.
    pub fn line_begin_pos(&self) -> BytePos {
        match self.lines.last() {