Auto merge of #113476 - fee1-dead-contrib:c-str-lit, r=petrochenkov
Reimplement C-str literals This reverts #113334, cc `@fmease.` While converting lexer tokens to ast Tokens in `rustc_parse`, we check the edition of the span of the token. If the edition < 2021, we split the token into two, one being the identifier and other being the str literal.
This commit is contained in:
commit
23405bb123
12 changed files with 85 additions and 97 deletions
|
@ -9,8 +9,8 @@ use rustc_ast::tokenstream::TokenStream;
|
|||
use rustc_ast::util::unicode::contains_text_flow_control_chars;
|
||||
use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey};
|
||||
use rustc_lexer::unescape::{self, EscapeError, Mode};
|
||||
use rustc_lexer::Cursor;
|
||||
use rustc_lexer::{Base, DocStyle, RawStrError};
|
||||
use rustc_lexer::{Cursor, LiteralKind};
|
||||
use rustc_session::lint::builtin::{
|
||||
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
|
||||
};
|
||||
|
@ -118,6 +118,7 @@ impl<'a> StringReader<'a> {
|
|||
let mut swallow_next_invalid = 0;
|
||||
// Skip trivial (whitespace & comments) tokens
|
||||
loop {
|
||||
let str_before = self.cursor.as_str();
|
||||
let token = self.cursor.advance_token();
|
||||
let start = self.pos;
|
||||
self.pos = self.pos + BytePos(token.len);
|
||||
|
@ -165,10 +166,7 @@ impl<'a> StringReader<'a> {
|
|||
continue;
|
||||
}
|
||||
rustc_lexer::TokenKind::Ident => {
|
||||
let sym = nfc_normalize(self.str_from(start));
|
||||
let span = self.mk_sp(start, self.pos);
|
||||
self.sess.symbol_gallery.insert(sym, span);
|
||||
token::Ident(sym, false)
|
||||
self.ident(start)
|
||||
}
|
||||
rustc_lexer::TokenKind::RawIdent => {
|
||||
let sym = nfc_normalize(self.str_from(start + BytePos(2)));
|
||||
|
@ -182,10 +180,7 @@ impl<'a> StringReader<'a> {
|
|||
}
|
||||
rustc_lexer::TokenKind::UnknownPrefix => {
|
||||
self.report_unknown_prefix(start);
|
||||
let sym = nfc_normalize(self.str_from(start));
|
||||
let span = self.mk_sp(start, self.pos);
|
||||
self.sess.symbol_gallery.insert(sym, span);
|
||||
token::Ident(sym, false)
|
||||
self.ident(start)
|
||||
}
|
||||
rustc_lexer::TokenKind::InvalidIdent
|
||||
// Do not recover an identifier with emoji if the codepoint is a confusable
|
||||
|
@ -203,6 +198,27 @@ impl<'a> StringReader<'a> {
|
|||
.push(span);
|
||||
token::Ident(sym, false)
|
||||
}
|
||||
// split up (raw) c string literals to an ident and a string literal when edition < 2021.
|
||||
rustc_lexer::TokenKind::Literal {
|
||||
kind: kind @ (LiteralKind::CStr { .. } | LiteralKind::RawCStr { .. }),
|
||||
suffix_start: _,
|
||||
} if !self.mk_sp(start, self.pos).edition().at_least_rust_2021() => {
|
||||
let prefix_len = match kind {
|
||||
LiteralKind::CStr { .. } => 1,
|
||||
LiteralKind::RawCStr { .. } => 2,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
// reset the state so that only the prefix ("c" or "cr")
|
||||
// was consumed.
|
||||
let lit_start = start + BytePos(prefix_len);
|
||||
self.pos = lit_start;
|
||||
self.cursor = Cursor::new(&str_before[prefix_len as usize..]);
|
||||
|
||||
self.report_unknown_prefix(start);
|
||||
let prefix_span = self.mk_sp(start, lit_start);
|
||||
return (Token::new(self.ident(start), prefix_span), preceded_by_whitespace);
|
||||
}
|
||||
rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
|
||||
let suffix_start = start + BytePos(suffix_start);
|
||||
let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
|
||||
|
@ -317,6 +333,13 @@ impl<'a> StringReader<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn ident(&self, start: BytePos) -> TokenKind {
|
||||
let sym = nfc_normalize(self.str_from(start));
|
||||
let span = self.mk_sp(start, self.pos);
|
||||
self.sess.symbol_gallery.insert(sym, span);
|
||||
token::Ident(sym, false)
|
||||
}
|
||||
|
||||
fn struct_fatal_span_char(
|
||||
&self,
|
||||
from_pos: BytePos,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue