Rollup merge of #134192 - nnethercote:rm-Lexer-Parser-dep, r=compiler-errors

Remove `Lexer`'s dependency on `Parser`. Lexing precedes parsing, as you'd expect: `Lexer` creates a `TokenStream` and `Parser` then parses that `TokenStream`. But, in a horrendous violation of layering abstractions and common sense, `Lexer` depends on `Parser`! The `Lexer::unclosed_delim_err` method does some error recovery that relies on creating a `Parser` to do some post-processing of the `TokenStream` that the `Lexer` just created. This commit just removes `unclosed_delim_err`. This change removes `Lexer`'s dependency on `Parser`, and also means that `lex_token_tree`'s return value can have a more typical form. The cost is slightly worse error messages in two obscure cases, as shown in these tests: - tests/ui/parser/brace-in-let-chain.rs: there is slightly less explanation in this case involving an extra `{`. - tests/ui/parser/diff-markers/unclosed-delims{,-in-macro}.rs: the diff marker detection is no longer supported (because that detection is implemented in the parser). In my opinion this cost is outweighed by the magnitude of the code cleanup. r? ```````@chenyukang```````
2024-12-14 05:01:06 +01:00 · 2024-12-14 05:01:06 +01:00 · ac6ac81a67
commit ac6ac81a67
parent 03e328d178 2e412fef75
8 changed files with 67 additions and 173 deletions
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@ -69,24 +69,30 @@ pub(crate) fn lex_token_trees<'psess, 'src>(
        token: Token::dummy(),
        diag_info: TokenTreeDiagInfo::default(),
    };
-    let (_open_spacing, stream, res) = lexer.lex_token_trees(/* is_delimited */ false);
-    let unmatched_delims = lexer.diag_info.unmatched_delims;
+    let res = lexer.lex_token_trees(/* is_delimited */ false);

-    if res.is_ok() && unmatched_delims.is_empty() {
-        Ok(stream)
-    } else {
-        // Return error if there are unmatched delimiters or unclosed delimiters.
-        // We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch
-        // because the delimiter mismatch is more likely to be the root cause of error
-        let mut buffer: Vec<_> = unmatched_delims
+    let mut unmatched_delims: Vec<_> = lexer
+        .diag_info
+        .unmatched_delims
        .into_iter()
        .filter_map(|unmatched_delim| make_unclosed_delims_error(unmatched_delim, psess))
        .collect();
-        if let Err(errs) = res {
-            // Add unclosing delimiter or diff marker errors
-            buffer.extend(errs);
+
+    match res {
+        Ok((_open_spacing, stream)) => {
+            if unmatched_delims.is_empty() {
+                Ok(stream)
+            } else {
+                // Return error if there are unmatched delimiters or unclosed delimiters.
+                Err(unmatched_delims)
+            }
+        }
+        Err(errs) => {
+            // We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch
+            // because the delimiter mismatch is more likely to be the root cause of error
+            unmatched_delims.extend(errs);
+            Err(unmatched_delims)
        }
-        Err(buffer)
    }
 }

--- a/compiler/rustc_parse/src/lexer/tokentrees.rs
+++ b/compiler/rustc_parse/src/lexer/tokentrees.rs
@ -1,12 +1,10 @@
 use rustc_ast::token::{self, Delimiter, Token};
 use rustc_ast::tokenstream::{DelimSpacing, DelimSpan, Spacing, TokenStream, TokenTree};
 use rustc_ast_pretty::pprust::token_to_string;
-use rustc_errors::{Applicability, Diag};
-use rustc_span::symbol::kw;
+use rustc_errors::Diag;

 use super::diagnostics::{report_suspicious_mismatch_block, same_indentation_level};
 use super::{Lexer, UnmatchedDelim};
-use crate::Parser;

 impl<'psess, 'src> Lexer<'psess, 'src> {
    // Lex into a token stream. The `Spacing` in the result is that of the
@ -14,7 +12,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
    pub(super) fn lex_token_trees(
        &mut self,
        is_delimited: bool,
-    ) -> (Spacing, TokenStream, Result<(), Vec<Diag<'psess>>>) {
+    ) -> Result<(Spacing, TokenStream), Vec<Diag<'psess>>> {
        // Move past the opening delimiter.
        let open_spacing = self.bump_minimal();

@ -27,25 +25,25 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                    debug_assert!(!matches!(delim, Delimiter::Invisible(_)));
                    buf.push(match self.lex_token_tree_open_delim(delim) {
                        Ok(val) => val,
-                        Err(errs) => return (open_spacing, TokenStream::new(buf), Err(errs)),
+                        Err(errs) => return Err(errs),
                    })
                }
                token::CloseDelim(delim) => {
                    // Invisible delimiters cannot occur here because `TokenTreesReader` parses
                    // code directly from strings, with no macro expansion involved.
                    debug_assert!(!matches!(delim, Delimiter::Invisible(_)));
-                    return (
-                        open_spacing,
-                        TokenStream::new(buf),
-                        if is_delimited { Ok(()) } else { Err(vec![self.close_delim_err(delim)]) },
-                    );
+                    return if is_delimited {
+                        Ok((open_spacing, TokenStream::new(buf)))
+                    } else {
+                        Err(vec![self.close_delim_err(delim)])
+                    };
                }
                token::Eof => {
-                    return (
-                        open_spacing,
-                        TokenStream::new(buf),
-                        if is_delimited { Err(vec![self.eof_err()]) } else { Ok(()) },
-                    );
+                    return if is_delimited {
+                        Err(vec![self.eof_err()])
+                    } else {
+                        Ok((open_spacing, TokenStream::new(buf)))
+                    };
                }
                _ => {
                    // Get the next normal token.
@ -107,10 +105,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
        // Lex the token trees within the delimiters.
        // We stop at any delimiter so we can try to recover if the user
        // uses an incorrect delimiter.
-        let (open_spacing, tts, res) = self.lex_token_trees(/* is_delimited */ true);
-        if let Err(errs) = res {
-            return Err(self.unclosed_delim_err(tts, errs));
-        }
+        let (open_spacing, tts) = self.lex_token_trees(/* is_delimited */ true)?;

        // Expand to cover the entire delimited token tree.
        let delim_span = DelimSpan::from_pair(pre_span, self.token.span);
@ -247,67 +242,6 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
        this_spacing
    }

-    fn unclosed_delim_err(
-        &mut self,
-        tts: TokenStream,
-        mut errs: Vec<Diag<'psess>>,
-    ) -> Vec<Diag<'psess>> {
-        // If there are unclosed delims, see if there are diff markers and if so, point them
-        // out instead of complaining about the unclosed delims.
-        let mut parser = Parser::new(self.psess, tts, None);
-        let mut diff_errs = vec![];
-        // Suggest removing a `{` we think appears in an `if`/`while` condition.
-        // We want to suggest removing a `{` only if we think we're in an `if`/`while` condition,
-        // but we have no way of tracking this in the lexer itself, so we piggyback on the parser.
-        let mut in_cond = false;
-        while parser.token != token::Eof {
-            if let Err(diff_err) = parser.err_vcs_conflict_marker() {
-                diff_errs.push(diff_err);
-            } else if parser.is_keyword_ahead(0, &[kw::If, kw::While]) {
-                in_cond = true;
-            } else if matches!(
-                parser.token.kind,
-                token::CloseDelim(Delimiter::Brace) | token::FatArrow
-            ) {
-                // End of the `if`/`while` body, or the end of a `match` guard.
-                in_cond = false;
-            } else if in_cond && parser.token == token::OpenDelim(Delimiter::Brace) {
-                // Store the `&&` and `let` to use their spans later when creating the diagnostic
-                let maybe_andand = parser.look_ahead(1, |t| t.clone());
-                let maybe_let = parser.look_ahead(2, |t| t.clone());
-                if maybe_andand == token::OpenDelim(Delimiter::Brace) {
-                    // This might be the beginning of the `if`/`while` body (i.e., the end of the
-                    // condition).
-                    in_cond = false;
-                } else if maybe_andand == token::AndAnd && maybe_let.is_keyword(kw::Let) {
-                    let mut err = parser.dcx().struct_span_err(
-                        parser.token.span,
-                        "found a `{` in the middle of a let-chain",
-                    );
-                    err.span_suggestion(
-                        parser.token.span,
-                        "consider removing this brace to parse the `let` as part of the same chain",
-                        "",
-                        Applicability::MachineApplicable,
-                    );
-                    err.span_label(
-                        maybe_andand.span.to(maybe_let.span),
-                        "you might have meant to continue the let-chain here",
-                    );
-                    errs.push(err);
-                }
-            }
-            parser.bump();
-        }
-        if !diff_errs.is_empty() {
-            for err in errs {
-                err.cancel();
-            }
-            return diff_errs;
-        }
-        errs
-    }
-
    fn close_delim_err(&mut self, delim: Delimiter) -> Diag<'psess> {
        // An unexpected closing delimiter (i.e., there is no matching opening delimiter).
        let token_str = token_to_string(&self.token);
--- a/tests/ui/parser/brace-in-let-chain.rs
+++ b/tests/ui/parser/brace-in-let-chain.rs
@ -3,7 +3,7 @@
 #![feature(let_chains)]
 fn main() {
    if let () = ()
-        && let () = () { //~ERROR: found a `{` in the middle of a let-chain
+        && let () = () {
        && let () = ()
    {
    }
@ -11,7 +11,7 @@ fn main() {

 fn quux() {
    while let () = ()
-        && let () = () { //~ERROR: found a `{` in the middle of a let-chain
+        && let () = () {
        && let () = ()
    {
    }
--- a/tests/ui/parser/brace-in-let-chain.stderr
+++ b/tests/ui/parser/brace-in-let-chain.stderr
@ -27,33 +27,5 @@ LL |     }
 LL | }
   |                                                     ^

-error: found a `{` in the middle of a let-chain
-  --> $DIR/brace-in-let-chain.rs:14:24
-   |
-LL |         && let () = () {
-   |                        ^
-LL |         && let () = ()
-   |         ------ you might have meant to continue the let-chain here
-   |
-help: consider removing this brace to parse the `let` as part of the same chain
-   |
-LL -         && let () = () {
-LL +         && let () = ()
-   |
-
-error: found a `{` in the middle of a let-chain
-  --> $DIR/brace-in-let-chain.rs:6:24
-   |
-LL |         && let () = () {
-   |                        ^
-LL |         && let () = ()
-   |         ------ you might have meant to continue the let-chain here
-   |
-help: consider removing this brace to parse the `let` as part of the same chain
-   |
-LL -         && let () = () {
-LL +         && let () = ()
-   |
-
-error: aborting due to 3 previous errors
+error: aborting due to 1 previous error

--- a/tests/ui/parser/diff-markers/unclosed-delims-in-macro.rs
+++ b/tests/ui/parser/diff-markers/unclosed-delims-in-macro.rs
@ -1,9 +1,11 @@
+// The diff marker detection was removed for this example, because it relied on
+// the lexer having a dependency on the parser, which was horrible.
+
 macro_rules! foo {
 <<<<<<< HEAD
-    //~^ ERROR encountered diff marker
    () {
 =======
    () { //
 >>>>>>> 7a4f13c blah blah blah
    }
-}
+} //~ this file contains an unclosed delimiter
--- a/tests/ui/parser/diff-markers/unclosed-delims-in-macro.stderr
+++ b/tests/ui/parser/diff-markers/unclosed-delims-in-macro.stderr
@ -1,23 +1,16 @@
-error: encountered diff marker
-  --> $DIR/unclosed-delims-in-macro.rs:2:1
+error: this file contains an unclosed delimiter
+  --> $DIR/unclosed-delims-in-macro.rs:11:48
   |
+LL | macro_rules! foo {
+   |                  - unclosed delimiter
 LL | <<<<<<< HEAD
-   | ^^^^^^^ between this marker and `=======` is the code that we're merging into
+LL |     () {
+   |        - this delimiter might not be properly closed...
 ...
-LL | =======
-   | ------- between this marker and `>>>>>>>` is the incoming code
-LL |     () { //
-LL | >>>>>>> 7a4f13c blah blah blah
-   | ^^^^^^^ this marker concludes the conflict region
-   |
-   = note: conflict markers indicate that a merge was started but could not be completed due to merge conflicts
-           to resolve a conflict, keep only the code you want and then delete the lines containing conflict markers
-   = help: if you're having merge conflicts after pulling new code:
-           the top section is the code you already had and the bottom section is the remote code
-           if you're in the middle of a rebase:
-           the top section is the code being rebased onto and the bottom section is the code coming from the current commit being rebased
-   = note: for an explanation on these markers from the `git` documentation:
-           visit <https://git-scm.com/book/en/v2/Git-Tools-Advanced-Merging#_checking_out_conflicts>
+LL | }
+   | -                                             ^
+   | |
+   | ...as it matches this but it has different indentation

 error: aborting due to 1 previous error

--- a/tests/ui/parser/diff-markers/unclosed-delims.rs
+++ b/tests/ui/parser/diff-markers/unclosed-delims.rs
@ -1,18 +1,12 @@
+// The diff marker detection was removed for this example, because it relied on
+// the lexer having a dependency on the parser, which was horrible.
+
 mod tests {
    #[test]
 <<<<<<< HEAD
-//~^ ERROR encountered diff marker
-//~| NOTE between this marker and `=======`
-
-//~| NOTE conflict markers indicate that
-//~| HELP if you're having merge conflicts
-//~| NOTE for an explanation on these markers
-
    fn test1() {
 =======
-//~^ NOTE between this marker and `>>>>>>>`
    fn test2() {
 >>>>>>> 7a4f13c blah blah blah
-//~^ NOTE this marker concludes the conflict region
    }
-}
+} //~ this file contains an unclosed delimiter
--- a/tests/ui/parser/diff-markers/unclosed-delims.stderr
+++ b/tests/ui/parser/diff-markers/unclosed-delims.stderr
@ -1,23 +1,16 @@
-error: encountered diff marker
-  --> $DIR/unclosed-delims.rs:3:1
+error: this file contains an unclosed delimiter
+  --> $DIR/unclosed-delims.rs:12:48
   |
-LL | <<<<<<< HEAD
-   | ^^^^^^^ between this marker and `=======` is the code that we're merging into
+LL | mod tests {
+   |           - unclosed delimiter
 ...
-LL | =======
-   | ------- between this marker and `>>>>>>>` is the incoming code
+LL |     fn test1() {
+   |                - this delimiter might not be properly closed...
 ...
-LL | >>>>>>> 7a4f13c blah blah blah
-   | ^^^^^^^ this marker concludes the conflict region
-   |
-   = note: conflict markers indicate that a merge was started but could not be completed due to merge conflicts
-           to resolve a conflict, keep only the code you want and then delete the lines containing conflict markers
-   = help: if you're having merge conflicts after pulling new code:
-           the top section is the code you already had and the bottom section is the remote code
-           if you're in the middle of a rebase:
-           the top section is the code being rebased onto and the bottom section is the code coming from the current commit being rebased
-   = note: for an explanation on these markers from the `git` documentation:
-           visit <https://git-scm.com/book/en/v2/Git-Tools-Advanced-Merging#_checking_out_conflicts>
+LL | }
+   | -                                             ^
+   | |
+   | ...as it matches this but it has different indentation

 error: aborting due to 1 previous error