1
Fork 0

Add StrStyle to ast::LitKind::ByteStr.

This is required to distinguish between cooked and raw byte string
literals in an `ast::LitKind`, without referring to an adjacent
`token::Lit`. It's a prerequisite for the next commit.
This commit is contained in:
Nicholas Nethercote 2022-11-29 13:35:44 +11:00
parent e658144586
commit a7f35c42d4
18 changed files with 39 additions and 28 deletions

View file

@ -1796,8 +1796,9 @@ pub enum LitKind {
/// A string literal (`"foo"`). The symbol is unescaped, and so may differ /// A string literal (`"foo"`). The symbol is unescaped, and so may differ
/// from the original token's symbol. /// from the original token's symbol.
Str(Symbol, StrStyle), Str(Symbol, StrStyle),
/// A byte string (`b"foo"`). /// A byte string (`b"foo"`). Not stored as a symbol because it might be
ByteStr(Lrc<[u8]>), /// non-utf8, and symbols only allow utf8 strings.
ByteStr(Lrc<[u8]>, StrStyle),
/// A byte char (`b'f'`). /// A byte char (`b'f'`).
Byte(u8), Byte(u8),
/// A character literal (`'a'`). /// A character literal (`'a'`).
@ -1822,7 +1823,7 @@ impl LitKind {
/// Returns `true` if this literal is byte literal string. /// Returns `true` if this literal is byte literal string.
pub fn is_bytestr(&self) -> bool { pub fn is_bytestr(&self) -> bool {
matches!(self, LitKind::ByteStr(_)) matches!(self, LitKind::ByteStr(..))
} }
/// Returns `true` if this is a numeric literal. /// Returns `true` if this is a numeric literal.

View file

@ -1,11 +1,12 @@
//! Code related to parsing literals. //! Code related to parsing literals.
use crate::ast::{self, LitKind, MetaItemLit}; use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
use crate::token::{self, Token}; use crate::token::{self, Token};
use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode}; use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode};
use rustc_span::symbol::{kw, sym, Symbol}; use rustc_span::symbol::{kw, sym, Symbol};
use rustc_span::Span; use rustc_span::Span;
use std::ascii; use std::ascii;
use std::str;
#[derive(Debug)] #[derive(Debug)]
pub enum LitError { pub enum LitError {
@ -115,9 +116,9 @@ impl LitKind {
} }
}); });
error?; error?;
LitKind::ByteStr(buf.into()) LitKind::ByteStr(buf.into(), StrStyle::Cooked)
} }
token::ByteStrRaw(_) => { token::ByteStrRaw(n) => {
let s = symbol.as_str(); let s = symbol.as_str();
let bytes = if s.contains('\r') { let bytes = if s.contains('\r') {
let mut buf = Vec::with_capacity(s.len()); let mut buf = Vec::with_capacity(s.len());
@ -136,7 +137,7 @@ impl LitKind {
symbol.to_string().into_bytes() symbol.to_string().into_bytes()
}; };
LitKind::ByteStr(bytes.into()) LitKind::ByteStr(bytes.into(), StrStyle::Raw(n))
} }
token::Err => LitKind::Err, token::Err => LitKind::Err,
}) })
@ -155,10 +156,15 @@ impl LitKind {
(token::Str, symbol, None) (token::Str, symbol, None)
} }
LitKind::Str(symbol, ast::StrStyle::Raw(n)) => (token::StrRaw(n), symbol, None), LitKind::Str(symbol, ast::StrStyle::Raw(n)) => (token::StrRaw(n), symbol, None),
LitKind::ByteStr(ref bytes) => { LitKind::ByteStr(ref bytes, ast::StrStyle::Cooked) => {
let string = bytes.escape_ascii().to_string(); let string = bytes.escape_ascii().to_string();
(token::ByteStr, Symbol::intern(&string), None) (token::ByteStr, Symbol::intern(&string), None)
} }
LitKind::ByteStr(ref bytes, ast::StrStyle::Raw(n)) => {
// Unwrap because raw byte string literals can only contain ASCII.
let string = str::from_utf8(bytes).unwrap();
(token::ByteStrRaw(n), Symbol::intern(&string), None)
}
LitKind::Byte(byte) => { LitKind::Byte(byte) => {
let string: String = ascii::escape_default(byte).map(Into::<char>::into).collect(); let string: String = ascii::escape_default(byte).map(Into::<char>::into).collect();
(token::Byte, Symbol::intern(&string), None) (token::Byte, Symbol::intern(&string), None)

View file

@ -97,7 +97,7 @@ impl<'hir> LoweringContext<'_, 'hir> {
} }
ExprKind::IncludedBytes(bytes) => hir::ExprKind::Lit(respan( ExprKind::IncludedBytes(bytes) => hir::ExprKind::Lit(respan(
self.lower_span(e.span), self.lower_span(e.span),
LitKind::ByteStr(bytes.clone()), LitKind::ByteStr(bytes.clone(), StrStyle::Cooked),
)), )),
ExprKind::Cast(expr, ty) => { ExprKind::Cast(expr, ty) => {
let expr = self.lower_expr(expr); let expr = self.lower_expr(expr);

View file

@ -323,7 +323,8 @@ impl<'a> State<'a> {
self.print_token_literal(*token_lit, expr.span); self.print_token_literal(*token_lit, expr.span);
} }
ast::ExprKind::IncludedBytes(bytes) => { ast::ExprKind::IncludedBytes(bytes) => {
let lit = ast::LitKind::ByteStr(bytes.clone()).synthesize_token_lit(); let lit = ast::LitKind::ByteStr(bytes.clone(), ast::StrStyle::Cooked)
.synthesize_token_lit();
self.print_token_literal(lit, expr.span) self.print_token_literal(lit, expr.span)
} }
ast::ExprKind::Cast(expr, ty) => { ast::ExprKind::Cast(expr, ty) => {

View file

@ -69,7 +69,7 @@ fn invalid_type_err(
Ok(ast::LitKind::Int(_, _)) => { Ok(ast::LitKind::Int(_, _)) => {
cx.span_err(span, "numeric literal is not a `u8`"); cx.span_err(span, "numeric literal is not a `u8`");
} }
Ok(ast::LitKind::ByteStr(_) | ast::LitKind::Byte(_)) => unreachable!(), Ok(ast::LitKind::ByteStr(..) | ast::LitKind::Byte(_)) => unreachable!(),
Err(err) => { Err(err) => {
report_lit_error(&cx.sess.parse_sess, err, token_lit, span); report_lit_error(&cx.sess.parse_sess, err, token_lit, span);
} }
@ -97,7 +97,7 @@ fn handle_array_element(
)) if val <= u8::MAX.into() => Some(val as u8), )) if val <= u8::MAX.into() => Some(val as u8),
Ok(ast::LitKind::Byte(val)) => Some(val), Ok(ast::LitKind::Byte(val)) => Some(val),
Ok(ast::LitKind::ByteStr(_)) => { Ok(ast::LitKind::ByteStr(..)) => {
if !*has_errors { if !*has_errors {
cx.struct_span_err(expr.span, "cannot concatenate doubly nested array") cx.struct_span_err(expr.span, "cannot concatenate doubly nested array")
.note("byte strings are treated as arrays of bytes") .note("byte strings are treated as arrays of bytes")
@ -174,7 +174,7 @@ pub fn expand_concat_bytes(
Ok(ast::LitKind::Byte(val)) => { Ok(ast::LitKind::Byte(val)) => {
accumulator.push(val); accumulator.push(val);
} }
Ok(ast::LitKind::ByteStr(ref bytes)) => { Ok(ast::LitKind::ByteStr(ref bytes, _)) => {
accumulator.extend_from_slice(&bytes); accumulator.extend_from_slice(&bytes);
} }
_ => { _ => {

View file

@ -1234,7 +1234,7 @@ pub fn expr_to_spanned_string<'a>(
Err(match expr.kind { Err(match expr.kind {
ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) { ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) {
Ok(ast::LitKind::Str(s, style)) => return Ok((s, style, expr.span)), Ok(ast::LitKind::Str(s, style)) => return Ok((s, style, expr.span)),
Ok(ast::LitKind::ByteStr(_)) => { Ok(ast::LitKind::ByteStr(..)) => {
let mut err = cx.struct_span_err(expr.span, err_msg); let mut err = cx.struct_span_err(expr.span, err_msg);
let span = expr.span.shrink_to_lo(); let span = expr.span.shrink_to_lo();
err.span_suggestion( err.span_suggestion(

View file

@ -361,7 +361,7 @@ impl<'a> ExtCtxt<'a> {
} }
pub fn expr_byte_str(&self, sp: Span, bytes: Vec<u8>) -> P<ast::Expr> { pub fn expr_byte_str(&self, sp: Span, bytes: Vec<u8>) -> P<ast::Expr> {
self.expr_lit(sp, ast::LitKind::ByteStr(Lrc::from(bytes))) self.expr_lit(sp, ast::LitKind::ByteStr(Lrc::from(bytes), ast::StrStyle::Cooked))
} }
/// `[expr1, expr2, ...]` /// `[expr1, expr2, ...]`

View file

@ -526,7 +526,8 @@ impl server::TokenStream for Rustc<'_, '_> {
Ok(tokenstream::TokenStream::token_alone(token::Literal(*token_lit), expr.span)) Ok(tokenstream::TokenStream::token_alone(token::Literal(*token_lit), expr.span))
} }
ast::ExprKind::IncludedBytes(bytes) => { ast::ExprKind::IncludedBytes(bytes) => {
let lit = ast::LitKind::ByteStr(bytes.clone()).synthesize_token_lit(); let lit = ast::LitKind::ByteStr(bytes.clone(), ast::StrStyle::Cooked)
.synthesize_token_lit();
Ok(tokenstream::TokenStream::token_alone(token::TokenKind::Literal(lit), expr.span)) Ok(tokenstream::TokenStream::token_alone(token::TokenKind::Literal(lit), expr.span))
} }
ast::ExprKind::Unary(ast::UnOp::Neg, e) => match &e.kind { ast::ExprKind::Unary(ast::UnOp::Neg, e) => match &e.kind {

View file

@ -1169,7 +1169,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
match lit.node { match lit.node {
ast::LitKind::Str(..) => tcx.mk_static_str(), ast::LitKind::Str(..) => tcx.mk_static_str(),
ast::LitKind::ByteStr(ref v) => { ast::LitKind::ByteStr(ref v, _) => {
tcx.mk_imm_ref(tcx.lifetimes.re_static, tcx.mk_array(tcx.types.u8, v.len() as u64)) tcx.mk_imm_ref(tcx.lifetimes.re_static, tcx.mk_array(tcx.types.u8, v.len() as u64))
} }
ast::LitKind::Byte(_) => tcx.types.u8, ast::LitKind::Byte(_) => tcx.types.u8,

View file

@ -386,7 +386,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
// Byte string patterns behave the same way as array patterns // Byte string patterns behave the same way as array patterns
// They can denote both statically and dynamically-sized byte arrays. // They can denote both statically and dynamically-sized byte arrays.
let mut pat_ty = ty; let mut pat_ty = ty;
if let hir::ExprKind::Lit(Spanned { node: ast::LitKind::ByteStr(_), .. }) = lt.kind { if let hir::ExprKind::Lit(Spanned { node: ast::LitKind::ByteStr(..), .. }) = lt.kind {
let expected = self.structurally_resolved_type(span, expected); let expected = self.structurally_resolved_type(span, expected);
if let ty::Ref(_, inner_ty, _) = expected.kind() if let ty::Ref(_, inner_ty, _) = expected.kind()
&& matches!(inner_ty.kind(), ty::Slice(_)) && matches!(inner_ty.kind(), ty::Slice(_))

View file

@ -135,14 +135,14 @@ pub(crate) fn lit_to_mir_constant<'tcx>(
let allocation = tcx.intern_const_alloc(allocation); let allocation = tcx.intern_const_alloc(allocation);
ConstValue::Slice { data: allocation, start: 0, end: s.len() } ConstValue::Slice { data: allocation, start: 0, end: s.len() }
} }
(ast::LitKind::ByteStr(data), ty::Ref(_, inner_ty, _)) (ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _))
if matches!(inner_ty.kind(), ty::Slice(_)) => if matches!(inner_ty.kind(), ty::Slice(_)) =>
{ {
let allocation = Allocation::from_bytes_byte_aligned_immutable(data as &[u8]); let allocation = Allocation::from_bytes_byte_aligned_immutable(data as &[u8]);
let allocation = tcx.intern_const_alloc(allocation); let allocation = tcx.intern_const_alloc(allocation);
ConstValue::Slice { data: allocation, start: 0, end: data.len() } ConstValue::Slice { data: allocation, start: 0, end: data.len() }
} }
(ast::LitKind::ByteStr(data), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => { (ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => {
let id = tcx.allocate_bytes(data); let id = tcx.allocate_bytes(data);
ConstValue::Scalar(Scalar::from_pointer(id.into(), &tcx)) ConstValue::Scalar(Scalar::from_pointer(id.into(), &tcx))
} }

View file

@ -33,13 +33,13 @@ pub(crate) fn lit_to_const<'tcx>(
let str_bytes = s.as_str().as_bytes(); let str_bytes = s.as_str().as_bytes();
ty::ValTree::from_raw_bytes(tcx, str_bytes) ty::ValTree::from_raw_bytes(tcx, str_bytes)
} }
(ast::LitKind::ByteStr(data), ty::Ref(_, inner_ty, _)) (ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _))
if matches!(inner_ty.kind(), ty::Slice(_)) => if matches!(inner_ty.kind(), ty::Slice(_)) =>
{ {
let bytes = data as &[u8]; let bytes = data as &[u8];
ty::ValTree::from_raw_bytes(tcx, bytes) ty::ValTree::from_raw_bytes(tcx, bytes)
} }
(ast::LitKind::ByteStr(data), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => { (ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => {
let bytes = data as &[u8]; let bytes = data as &[u8];
ty::ValTree::from_raw_bytes(tcx, bytes) ty::ValTree::from_raw_bytes(tcx, bytes)
} }

View file

@ -33,7 +33,7 @@ impl<'tcx> LateLintPass<'tcx> for InvalidUtf8InUnchecked {
if let Some([arg]) = match_function_call(cx, expr, &paths::STR_FROM_UTF8_UNCHECKED) { if let Some([arg]) = match_function_call(cx, expr, &paths::STR_FROM_UTF8_UNCHECKED) {
match &arg.kind { match &arg.kind {
ExprKind::Lit(Spanned { node: lit, .. }) => { ExprKind::Lit(Spanned { node: lit, .. }) => {
if let LitKind::ByteStr(bytes) = &lit if let LitKind::ByteStr(bytes, _) = &lit
&& std::str::from_utf8(bytes).is_err() && std::str::from_utf8(bytes).is_err()
{ {
lint(cx, expr.span); lint(cx, expr.span);

View file

@ -60,7 +60,7 @@ impl LateLintPass<'_> for LargeIncludeFile {
then { then {
let len = match &lit.node { let len = match &lit.node {
// include_bytes // include_bytes
LitKind::ByteStr(bstr) => bstr.len(), LitKind::ByteStr(bstr, _) => bstr.len(),
// include_str // include_str
LitKind::Str(sym, _) => sym.as_str().len(), LitKind::Str(sym, _) => sym.as_str().len(),
_ => return, _ => return,

View file

@ -282,7 +282,7 @@ impl<'a> NormalizedPat<'a> {
// TODO: Handle negative integers. They're currently treated as a wild match. // TODO: Handle negative integers. They're currently treated as a wild match.
ExprKind::Lit(lit) => match lit.node { ExprKind::Lit(lit) => match lit.node {
LitKind::Str(sym, _) => Self::LitStr(sym), LitKind::Str(sym, _) => Self::LitStr(sym),
LitKind::ByteStr(ref bytes) => Self::LitBytes(bytes), LitKind::ByteStr(ref bytes, _) => Self::LitBytes(bytes),
LitKind::Byte(val) => Self::LitInt(val.into()), LitKind::Byte(val) => Self::LitInt(val.into()),
LitKind::Char(val) => Self::LitInt(val.into()), LitKind::Char(val) => Self::LitInt(val.into()),
LitKind::Int(val, _) => Self::LitInt(val), LitKind::Int(val, _) => Self::LitInt(val),

View file

@ -299,7 +299,7 @@ impl<'a, 'tcx> PrintVisitor<'a, 'tcx> {
}; };
kind!("Float(_, {float_ty})"); kind!("Float(_, {float_ty})");
}, },
LitKind::ByteStr(ref vec) => { LitKind::ByteStr(ref vec, _) => {
bind!(self, vec); bind!(self, vec);
kind!("ByteStr(ref {vec})"); kind!("ByteStr(ref {vec})");
chain!(self, "let [{:?}] = **{vec}", vec.value); chain!(self, "let [{:?}] = **{vec}", vec.value);

View file

@ -69,7 +69,9 @@ fn lit_search_pat(lit: &LitKind) -> (Pat, Pat) {
LitKind::Str(_, StrStyle::Cooked) => (Pat::Str("\""), Pat::Str("\"")), LitKind::Str(_, StrStyle::Cooked) => (Pat::Str("\""), Pat::Str("\"")),
LitKind::Str(_, StrStyle::Raw(0)) => (Pat::Str("r"), Pat::Str("\"")), LitKind::Str(_, StrStyle::Raw(0)) => (Pat::Str("r"), Pat::Str("\"")),
LitKind::Str(_, StrStyle::Raw(_)) => (Pat::Str("r#"), Pat::Str("#")), LitKind::Str(_, StrStyle::Raw(_)) => (Pat::Str("r#"), Pat::Str("#")),
LitKind::ByteStr(_) => (Pat::Str("b\""), Pat::Str("\"")), LitKind::ByteStr(_, StrStyle::Cooked) => (Pat::Str("b\""), Pat::Str("\"")),
LitKind::ByteStr(_, StrStyle::Raw(0)) => (Pat::Str("br\""), Pat::Str("\"")),
LitKind::ByteStr(_, StrStyle::Raw(_)) => (Pat::Str("br#\""), Pat::Str("#")),
LitKind::Byte(_) => (Pat::Str("b'"), Pat::Str("'")), LitKind::Byte(_) => (Pat::Str("b'"), Pat::Str("'")),
LitKind::Char(_) => (Pat::Str("'"), Pat::Str("'")), LitKind::Char(_) => (Pat::Str("'"), Pat::Str("'")),
LitKind::Int(_, LitIntType::Signed(IntTy::Isize)) => (Pat::Num, Pat::Str("isize")), LitKind::Int(_, LitIntType::Signed(IntTy::Isize)) => (Pat::Num, Pat::Str("isize")),

View file

@ -210,7 +210,7 @@ pub fn lit_to_mir_constant(lit: &LitKind, ty: Option<Ty<'_>>) -> Constant {
match *lit { match *lit {
LitKind::Str(ref is, _) => Constant::Str(is.to_string()), LitKind::Str(ref is, _) => Constant::Str(is.to_string()),
LitKind::Byte(b) => Constant::Int(u128::from(b)), LitKind::Byte(b) => Constant::Int(u128::from(b)),
LitKind::ByteStr(ref s) => Constant::Binary(Lrc::clone(s)), LitKind::ByteStr(ref s, _) => Constant::Binary(Lrc::clone(s)),
LitKind::Char(c) => Constant::Char(c), LitKind::Char(c) => Constant::Char(c),
LitKind::Int(n, _) => Constant::Int(n), LitKind::Int(n, _) => Constant::Int(n),
LitKind::Float(ref is, LitFloatType::Suffixed(fty)) => match fty { LitKind::Float(ref is, LitFloatType::Suffixed(fty)) => match fty {