Emit only one nbsp error per file

2023-01-14 10:34:06 -08:00 · 2023-01-14 10:34:06 -08:00 · dab06ccdab
commit dab06ccdab
parent 39edcfa84e
3 changed files with 27 additions and 45 deletions
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@ -52,8 +52,15 @@ pub(crate) fn parse_token_trees<'a>(
    }
    let cursor = Cursor::new(src);
-    let string_reader =
+    let string_reader = StringReader {
-        StringReader { sess, start_pos, pos: start_pos, src, cursor, override_span };
+        sess,
        start_pos,
        pos: start_pos,
        src,
        cursor,
        override_span,
        nbsp_is_whitespace: false,
    };
    tokentrees::TokenTreesReader::parse_all_token_trees(string_reader)
 }
@ -68,6 +75,10 @@ struct StringReader<'a> {
    /// Cursor for getting lexer tokens.
    cursor: Cursor<'a>,
    override_span: Option<Span>,
    /// When a "unknown start of token: \u{a0}" has already been emitted earlier
    /// in this file, it's safe to treat further occurrences of the non-breaking
    /// space character as whitespace.
    nbsp_is_whitespace: bool,
 }
 impl<'a> StringReader<'a> {
@ -239,6 +250,16 @@ impl<'a> StringReader<'a> {
                    }
                    let mut it = self.str_from_to_end(start).chars();
                    let c = it.next().unwrap();
                    if c == '\u{00a0}' {
                        // If an error has already been reported on non-breaking
                        // space characters earlier in the file, treat all
                        // subsequent occurrences as whitespace.
                        if self.nbsp_is_whitespace {
                            preceded_by_whitespace = true;
                            continue;
                        }
                        self.nbsp_is_whitespace = true;
                    }
                    let repeats = it.take_while(|c1| *c1 == c).count();
                    let mut err =
                        self.struct_err_span_char(start, self.pos + Pos::from_usize(repeats * c.len_utf8()), "unknown start of token", c);
@ -486,7 +507,7 @@ impl<'a> StringReader<'a> {
    /// Slice of the source text from `start` up to but excluding `self.pos`,
    /// meaning the slice does not include the character `self.ch`.
-    fn str_from(&self, start: BytePos) -> &str {
+    fn str_from(&self, start: BytePos) -> &'a str {
        self.str_from_to(start, self.pos)
    }
@ -497,12 +518,12 @@ impl<'a> StringReader<'a> {
    }
    /// Slice of the source text spanning from `start` up to but excluding `end`.
-    fn str_from_to(&self, start: BytePos, end: BytePos) -> &str {
+    fn str_from_to(&self, start: BytePos, end: BytePos) -> &'a str {
        &self.src[self.src_index(start)..self.src_index(end)]
    }
    /// Slice of the source text spanning from `start` until the end
-    fn str_from_to_end(&self, start: BytePos) -> &str {
+    fn str_from_to_end(&self, start: BytePos) -> &'a str {
        &self.src[self.src_index(start)..]
    }
--- a/tests/ui/parser/unicode-chars.rs
+++ b/tests/ui/parser/unicode-chars.rs
@ -6,10 +6,4 @@ fn main() {
    //~^ ERROR unknown start of token: \u{a0}
    //~^^ NOTE character appears 3 more times
    //~^^^ HELP Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is not
    //~^^^^ ERROR unknown start of token: \u{a0}
    //~^^^^^ HELP Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is not
    //~^^^^^^ ERROR unknown start of token: \u{a0}
    //~^^^^^^^ HELP Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is not
    //~^^^^^^^^ ERROR unknown start of token: \u{a0}
    //~^^^^^^^^^ HELP Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is not
 }
--- a/tests/ui/parser/unicode-chars.stderr
+++ b/tests/ui/parser/unicode-chars.stderr
@ -21,38 +21,5 @@ help: Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is
 LL |         let x = 0;
   |     ++++
-error: unknown start of token: \u{a0}
+error: aborting due to 2 previous errors
  --> $DIR/unicode-chars.rs:5:12
   |
 LL |         let x = 0;
   |            ^
   |
 help: Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is not
   |
 LL |         let x = 0;
   |            +
 error: unknown start of token: \u{a0}
  --> $DIR/unicode-chars.rs:5:14
   |
 LL |         let x = 0;
   |              ^
   |
 help: Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is not
   |
 LL |         let x = 0;
   |              +
 error: unknown start of token: \u{a0}
  --> $DIR/unicode-chars.rs:5:16
   |
 LL |         let x = 0;
   |                ^
   |
 help: Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is not
   |
 LL |         let x = 0;
   |                +
 error: aborting due to 5 previous errors