Rollup merge of #108801 - fee1-dead-contrib:c-str, r=compiler-errors
Implement RFC 3348, `c"foo"` literals RFC: https://github.com/rust-lang/rfcs/pull/3348 Tracking issue: #105723
This commit is contained in:
commit
4891f02cff
33 changed files with 503 additions and 156 deletions
|
@ -1821,6 +1821,8 @@ pub enum LitKind {
|
|||
/// A byte string (`b"foo"`). Not stored as a symbol because it might be
|
||||
/// non-utf8, and symbols only allow utf8 strings.
|
||||
ByteStr(Lrc<[u8]>, StrStyle),
|
||||
/// A C String (`c"foo"`). Guaranteed to only have `\0` at the end.
|
||||
CStr(Lrc<[u8]>, StrStyle),
|
||||
/// A byte char (`b'f'`).
|
||||
Byte(u8),
|
||||
/// A character literal (`'a'`).
|
||||
|
@ -1875,6 +1877,7 @@ impl LitKind {
|
|||
// unsuffixed variants
|
||||
LitKind::Str(..)
|
||||
| LitKind::ByteStr(..)
|
||||
| LitKind::CStr(..)
|
||||
| LitKind::Byte(..)
|
||||
| LitKind::Char(..)
|
||||
| LitKind::Int(_, LitIntType::Unsuffixed)
|
||||
|
|
|
@ -74,6 +74,8 @@ pub enum LitKind {
|
|||
StrRaw(u8), // raw string delimited by `n` hash symbols
|
||||
ByteStr,
|
||||
ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols
|
||||
CStr,
|
||||
CStrRaw(u8),
|
||||
Err,
|
||||
}
|
||||
|
||||
|
@ -141,6 +143,10 @@ impl fmt::Display for Lit {
|
|||
delim = "#".repeat(n as usize),
|
||||
string = symbol
|
||||
)?,
|
||||
CStr => write!(f, "c\"{symbol}\"")?,
|
||||
CStrRaw(n) => {
|
||||
write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))?
|
||||
}
|
||||
Integer | Float | Bool | Err => write!(f, "{symbol}")?,
|
||||
}
|
||||
|
||||
|
@ -170,6 +176,7 @@ impl LitKind {
|
|||
Float => "float",
|
||||
Str | StrRaw(..) => "string",
|
||||
ByteStr | ByteStrRaw(..) => "byte string",
|
||||
CStr | CStrRaw(..) => "C string",
|
||||
Err => "error",
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,9 +2,13 @@
|
|||
|
||||
use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
|
||||
use crate::token::{self, Token};
|
||||
use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode};
|
||||
use rustc_lexer::unescape::{
|
||||
byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit,
|
||||
Mode,
|
||||
};
|
||||
use rustc_span::symbol::{kw, sym, Symbol};
|
||||
use rustc_span::Span;
|
||||
use std::ops::Range;
|
||||
use std::{ascii, fmt, str};
|
||||
|
||||
// Escapes a string, represented as a symbol. Reuses the original symbol,
|
||||
|
@ -35,6 +39,7 @@ pub enum LitError {
|
|||
InvalidFloatSuffix,
|
||||
NonDecimalFloat(u32),
|
||||
IntTooLarge(u32),
|
||||
NulInCStr(Range<usize>),
|
||||
}
|
||||
|
||||
impl LitKind {
|
||||
|
@ -158,6 +163,52 @@ impl LitKind {
|
|||
|
||||
LitKind::ByteStr(bytes.into(), StrStyle::Raw(n))
|
||||
}
|
||||
token::CStr => {
|
||||
let s = symbol.as_str();
|
||||
let mut buf = Vec::with_capacity(s.len());
|
||||
let mut error = Ok(());
|
||||
unescape_c_string(s, Mode::CStr, &mut |span, c| match c {
|
||||
Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
|
||||
error = Err(LitError::NulInCStr(span));
|
||||
}
|
||||
Ok(CStrUnit::Byte(b)) => buf.push(b),
|
||||
Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
|
||||
Ok(CStrUnit::Char(c)) => {
|
||||
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
|
||||
}
|
||||
Err(err) => {
|
||||
if err.is_fatal() {
|
||||
error = Err(LitError::LexerError);
|
||||
}
|
||||
}
|
||||
});
|
||||
error?;
|
||||
buf.push(0);
|
||||
LitKind::CStr(buf.into(), StrStyle::Cooked)
|
||||
}
|
||||
token::CStrRaw(n) => {
|
||||
let s = symbol.as_str();
|
||||
let mut buf = Vec::with_capacity(s.len());
|
||||
let mut error = Ok(());
|
||||
unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c {
|
||||
Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
|
||||
error = Err(LitError::NulInCStr(span));
|
||||
}
|
||||
Ok(CStrUnit::Byte(b)) => buf.push(b),
|
||||
Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
|
||||
Ok(CStrUnit::Char(c)) => {
|
||||
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
|
||||
}
|
||||
Err(err) => {
|
||||
if err.is_fatal() {
|
||||
error = Err(LitError::LexerError);
|
||||
}
|
||||
}
|
||||
});
|
||||
error?;
|
||||
buf.push(0);
|
||||
LitKind::CStr(buf.into(), StrStyle::Raw(n))
|
||||
}
|
||||
token::Err => LitKind::Err,
|
||||
})
|
||||
}
|
||||
|
@ -191,6 +242,14 @@ impl fmt::Display for LitKind {
|
|||
string = symbol
|
||||
)?;
|
||||
}
|
||||
LitKind::CStr(ref bytes, StrStyle::Cooked) => {
|
||||
write!(f, "c\"{}\"", escape_byte_str_symbol(bytes))?
|
||||
}
|
||||
LitKind::CStr(ref bytes, StrStyle::Raw(n)) => {
|
||||
// This can only be valid UTF-8.
|
||||
let symbol = str::from_utf8(bytes).unwrap();
|
||||
write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize),)?;
|
||||
}
|
||||
LitKind::Int(n, ty) => {
|
||||
write!(f, "{n}")?;
|
||||
match ty {
|
||||
|
@ -237,6 +296,8 @@ impl MetaItemLit {
|
|||
LitKind::Str(_, ast::StrStyle::Raw(n)) => token::StrRaw(n),
|
||||
LitKind::ByteStr(_, ast::StrStyle::Cooked) => token::ByteStr,
|
||||
LitKind::ByteStr(_, ast::StrStyle::Raw(n)) => token::ByteStrRaw(n),
|
||||
LitKind::CStr(_, ast::StrStyle::Cooked) => token::CStr,
|
||||
LitKind::CStr(_, ast::StrStyle::Raw(n)) => token::CStrRaw(n),
|
||||
LitKind::Byte(_) => token::Byte,
|
||||
LitKind::Char(_) => token::Char,
|
||||
LitKind::Int(..) => token::Integer,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue