Rollup merge of #108801 - fee1-dead-contrib:c-str, r=compiler-errors

Implement RFC 3348, `c"foo"` literals

RFC: https://github.com/rust-lang/rfcs/pull/3348
Tracking issue: #105723
This commit is contained in:
Dylan DPC 2023-05-05 18:40:33 +05:30 committed by GitHub
commit 4891f02cff
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
33 changed files with 503 additions and 156 deletions

View file

@ -1,3 +1,5 @@
use std::ops::Range;
use crate::errors;
use crate::lexer::unicode_chars::UNICODE_ARRAY;
use crate::make_unclosed_delims_error;
@ -6,7 +8,7 @@ use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind};
use rustc_ast::tokenstream::TokenStream;
use rustc_ast::util::unicode::contains_text_flow_control_chars;
use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey};
use rustc_lexer::unescape::{self, Mode};
use rustc_lexer::unescape::{self, EscapeError, Mode};
use rustc_lexer::Cursor;
use rustc_lexer::{Base, DocStyle, RawStrError};
use rustc_session::lint::builtin::{
@ -204,6 +206,9 @@ impl<'a> StringReader<'a> {
rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
let suffix_start = start + BytePos(suffix_start);
let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
if let token::LitKind::CStr | token::LitKind::CStrRaw(_) = kind {
self.sess.gated_spans.gate(sym::c_str_literals, self.mk_sp(start, self.pos));
}
let suffix = if suffix_start < self.pos {
let string = self.str_from(suffix_start);
if string == "_" {
@ -415,6 +420,16 @@ impl<'a> StringReader<'a> {
}
self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
}
rustc_lexer::LiteralKind::CStr { terminated } => {
if !terminated {
self.sess.span_diagnostic.span_fatal_with_code(
self.mk_sp(start + BytePos(1), end),
"unterminated C string",
error_code!(E0767),
)
}
self.cook_c_string(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
}
rustc_lexer::LiteralKind::RawStr { n_hashes } => {
if let Some(n_hashes) = n_hashes {
let n = u32::from(n_hashes);
@ -433,6 +448,15 @@ impl<'a> StringReader<'a> {
self.report_raw_str_error(start, 2);
}
}
rustc_lexer::LiteralKind::RawCStr { n_hashes } => {
if let Some(n_hashes) = n_hashes {
let n = u32::from(n_hashes);
let kind = token::CStrRaw(n_hashes);
self.cook_c_string(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
} else {
self.report_raw_str_error(start, 2);
}
}
rustc_lexer::LiteralKind::Int { base, empty_int } => {
if empty_int {
let span = self.mk_sp(start, end);
@ -648,7 +672,7 @@ impl<'a> StringReader<'a> {
self.sess.emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
}
fn cook_quoted(
fn cook_common(
&self,
kind: token::LitKind,
mode: Mode,
@ -656,12 +680,13 @@ impl<'a> StringReader<'a> {
end: BytePos,
prefix_len: u32,
postfix_len: u32,
unescape: fn(&str, Mode, &mut dyn FnMut(Range<usize>, Result<(), EscapeError>)),
) -> (token::LitKind, Symbol) {
let mut has_fatal_err = false;
let content_start = start + BytePos(prefix_len);
let content_end = end - BytePos(postfix_len);
let lit_content = self.str_from_to(content_start, content_end);
unescape::unescape_literal(lit_content, mode, &mut |range, result| {
unescape(lit_content, mode, &mut |range, result| {
// Here we only check for errors. The actual unescaping is done later.
if let Err(err) = result {
let span_with_quotes = self.mk_sp(start, end);
@ -692,6 +717,38 @@ impl<'a> StringReader<'a> {
(token::Err, self.symbol_from_to(start, end))
}
}
fn cook_quoted(
&self,
kind: token::LitKind,
mode: Mode,
start: BytePos,
end: BytePos,
prefix_len: u32,
postfix_len: u32,
) -> (token::LitKind, Symbol) {
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
unescape::unescape_literal(src, mode, &mut |span, result| {
callback(span, result.map(drop))
})
})
}
fn cook_c_string(
&self,
kind: token::LitKind,
mode: Mode,
start: BytePos,
end: BytePos,
prefix_len: u32,
postfix_len: u32,
) -> (token::LitKind, Symbol) {
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
unescape::unescape_c_string(src, mode, &mut |span, result| {
callback(span, result.map(drop))
})
})
}
}
pub fn nfc_normalize(string: &str) -> Symbol {