1
Fork 0

Rollup merge of #108801 - fee1-dead-contrib:c-str, r=compiler-errors

Implement RFC 3348, `c"foo"` literals

RFC: https://github.com/rust-lang/rfcs/pull/3348
Tracking issue: #105723
This commit is contained in:
Dylan DPC 2023-05-05 18:40:33 +05:30 committed by GitHub
commit 4891f02cff
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
33 changed files with 503 additions and 156 deletions

View file

@ -1821,6 +1821,8 @@ pub enum LitKind {
/// A byte string (`b"foo"`). Not stored as a symbol because it might be
/// non-utf8, and symbols only allow utf8 strings.
ByteStr(Lrc<[u8]>, StrStyle),
/// A C String (`c"foo"`). Guaranteed to only have `\0` at the end.
CStr(Lrc<[u8]>, StrStyle),
/// A byte char (`b'f'`).
Byte(u8),
/// A character literal (`'a'`).
@ -1875,6 +1877,7 @@ impl LitKind {
// unsuffixed variants
LitKind::Str(..)
| LitKind::ByteStr(..)
| LitKind::CStr(..)
| LitKind::Byte(..)
| LitKind::Char(..)
| LitKind::Int(_, LitIntType::Unsuffixed)

View file

@ -74,6 +74,8 @@ pub enum LitKind {
StrRaw(u8), // raw string delimited by `n` hash symbols
ByteStr,
ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols
CStr,
CStrRaw(u8),
Err,
}
@ -141,6 +143,10 @@ impl fmt::Display for Lit {
delim = "#".repeat(n as usize),
string = symbol
)?,
CStr => write!(f, "c\"{symbol}\"")?,
CStrRaw(n) => {
write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))?
}
Integer | Float | Bool | Err => write!(f, "{symbol}")?,
}
@ -170,6 +176,7 @@ impl LitKind {
Float => "float",
Str | StrRaw(..) => "string",
ByteStr | ByteStrRaw(..) => "byte string",
CStr | CStrRaw(..) => "C string",
Err => "error",
}
}

View file

@ -2,9 +2,13 @@
use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
use crate::token::{self, Token};
use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode};
use rustc_lexer::unescape::{
byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit,
Mode,
};
use rustc_span::symbol::{kw, sym, Symbol};
use rustc_span::Span;
use std::ops::Range;
use std::{ascii, fmt, str};
// Escapes a string, represented as a symbol. Reuses the original symbol,
@ -35,6 +39,7 @@ pub enum LitError {
InvalidFloatSuffix,
NonDecimalFloat(u32),
IntTooLarge(u32),
NulInCStr(Range<usize>),
}
impl LitKind {
@ -158,6 +163,52 @@ impl LitKind {
LitKind::ByteStr(bytes.into(), StrStyle::Raw(n))
}
token::CStr => {
let s = symbol.as_str();
let mut buf = Vec::with_capacity(s.len());
let mut error = Ok(());
unescape_c_string(s, Mode::CStr, &mut |span, c| match c {
Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
error = Err(LitError::NulInCStr(span));
}
Ok(CStrUnit::Byte(b)) => buf.push(b),
Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
Ok(CStrUnit::Char(c)) => {
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
}
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
}
});
error?;
buf.push(0);
LitKind::CStr(buf.into(), StrStyle::Cooked)
}
token::CStrRaw(n) => {
let s = symbol.as_str();
let mut buf = Vec::with_capacity(s.len());
let mut error = Ok(());
unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c {
Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
error = Err(LitError::NulInCStr(span));
}
Ok(CStrUnit::Byte(b)) => buf.push(b),
Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
Ok(CStrUnit::Char(c)) => {
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
}
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
}
});
error?;
buf.push(0);
LitKind::CStr(buf.into(), StrStyle::Raw(n))
}
token::Err => LitKind::Err,
})
}
@ -191,6 +242,14 @@ impl fmt::Display for LitKind {
string = symbol
)?;
}
LitKind::CStr(ref bytes, StrStyle::Cooked) => {
write!(f, "c\"{}\"", escape_byte_str_symbol(bytes))?
}
LitKind::CStr(ref bytes, StrStyle::Raw(n)) => {
// This can only be valid UTF-8.
let symbol = str::from_utf8(bytes).unwrap();
write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize),)?;
}
LitKind::Int(n, ty) => {
write!(f, "{n}")?;
match ty {
@ -237,6 +296,8 @@ impl MetaItemLit {
LitKind::Str(_, ast::StrStyle::Raw(n)) => token::StrRaw(n),
LitKind::ByteStr(_, ast::StrStyle::Cooked) => token::ByteStr,
LitKind::ByteStr(_, ast::StrStyle::Raw(n)) => token::ByteStrRaw(n),
LitKind::CStr(_, ast::StrStyle::Cooked) => token::CStr,
LitKind::CStr(_, ast::StrStyle::Raw(n)) => token::CStrRaw(n),
LitKind::Byte(_) => token::Byte,
LitKind::Char(_) => token::Char,
LitKind::Int(..) => token::Integer,