diff --git a/Cargo.lock b/Cargo.lock index 63441814d4a..06c13942957 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2153,7 +2153,10 @@ checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" [[package]] name = "literal-escaper" -version = "0.0.1" +version = "0.0.0" +dependencies = [ + "rustc-std-workspace-std 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", +] [[package]] name = "lld-wrapper" @@ -3332,6 +3335,12 @@ version = "1.0.1" name = "rustc-std-workspace-std" version = "1.0.1" +[[package]] +name = "rustc-std-workspace-std" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aba676a20abe46e5b0f1b0deae474aaaf31407e6c71147159890574599da04ef" + [[package]] name = "rustc_abi" version = "0.0.0" diff --git a/library/Cargo.lock b/library/Cargo.lock index 8b78908e6d7..f769fb2e1e1 100644 --- a/library/Cargo.lock +++ b/library/Cargo.lock @@ -158,6 +158,13 @@ dependencies = [ "rustc-std-workspace-core", ] +[[package]] +name = "literal-escaper" +version = "0.0.0" +dependencies = [ + "rustc-std-workspace-std", +] + [[package]] name = "memchr" version = "2.7.4" @@ -220,6 +227,7 @@ name = "proc_macro" version = "0.0.0" dependencies = [ "core", + "literal-escaper", "std", ] diff --git a/library/proc_macro/Cargo.toml b/library/proc_macro/Cargo.toml index e54a50aa15c..e5c90309f16 100644 --- a/library/proc_macro/Cargo.toml +++ b/library/proc_macro/Cargo.toml @@ -4,6 +4,7 @@ version = "0.0.0" edition = "2021" [dependencies] +literal-escaper = { path = "../literal-escaper", features = ["rustc-dep-of-std"] } std = { path = "../std" } # Workaround: when documenting this crate rustdoc will try to load crate named # `core` when resolving doc links. Without this line a different `core` will be diff --git a/library/proc_macro/src/lib.rs b/library/proc_macro/src/lib.rs index 6611ce30a1b..57dd47f1060 100644 --- a/library/proc_macro/src/lib.rs +++ b/library/proc_macro/src/lib.rs @@ -28,6 +28,7 @@ #![feature(restricted_std)] #![feature(rustc_attrs)] #![feature(extend_one)] +#![feature(stmt_expr_attributes)] #![recursion_limit = "256"] #![allow(internal_features)] #![deny(ffi_unwind_calls)] @@ -50,11 +51,23 @@ use std::{error, fmt}; #[unstable(feature = "proc_macro_diagnostic", issue = "54140")] pub use diagnostic::{Diagnostic, Level, MultiSpan}; +#[unstable(feature = "proc_macro_value", issue = "136652")] +pub use literal_escaper::EscapeError; +use literal_escaper::{MixedUnit, Mode, byte_from_char, unescape_mixed, unescape_unicode}; #[unstable(feature = "proc_macro_totokens", issue = "130977")] pub use to_tokens::ToTokens; use crate::escape::{EscapeOptions, escape_bytes}; +/// Errors returned when trying to retrieve a literal unescaped value. +#[unstable(feature = "proc_macro_value", issue = "136652")] +pub enum ConversionErrorKind { + /// The literal failed to be escaped, take a look at [`EscapeError`] for more information. + FailedToUnescape(EscapeError), + /// Trying to convert a literal with the wrong type. + InvalidLiteralKind, +} + /// Determines whether proc_macro has been made accessible to the currently /// running program. /// @@ -1450,6 +1463,107 @@ impl Literal { } }) } + + /// Returns the unescaped string value if the current literal is a string or a string literal. + #[unstable(feature = "proc_macro_value", issue = "136652")] + pub fn str_value(&self) -> Result { + self.0.symbol.with(|symbol| match self.0.kind { + bridge::LitKind::Str => { + if symbol.contains('\\') { + let mut buf = String::with_capacity(symbol.len()); + let mut error = None; + // Force-inlining here is aggressive but the closure is + // called on every char in the string, so it can be hot in + // programs with many long strings containing escapes. + unescape_unicode( + symbol, + Mode::Str, + &mut #[inline(always)] + |_, c| match c { + Ok(c) => buf.push(c), + Err(err) => { + if err.is_fatal() { + error = Some(ConversionErrorKind::FailedToUnescape(err)); + } + } + }, + ); + if let Some(error) = error { Err(error) } else { Ok(buf) } + } else { + Ok(symbol.to_string()) + } + } + bridge::LitKind::StrRaw(_) => Ok(symbol.to_string()), + _ => Err(ConversionErrorKind::InvalidLiteralKind), + }) + } + + /// Returns the unescaped string value if the current literal is a c-string or a c-string + /// literal. + #[unstable(feature = "proc_macro_value", issue = "136652")] + pub fn cstr_value(&self) -> Result, ConversionErrorKind> { + self.0.symbol.with(|symbol| match self.0.kind { + bridge::LitKind::CStr => { + let mut error = None; + let mut buf = Vec::with_capacity(symbol.len()); + + unescape_mixed(symbol, Mode::CStr, &mut |_span, c| match c { + Ok(MixedUnit::Char(c)) => { + buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) + } + Ok(MixedUnit::HighByte(b)) => buf.push(b), + Err(err) => { + if err.is_fatal() { + error = Some(ConversionErrorKind::FailedToUnescape(err)); + } + } + }); + if let Some(error) = error { + Err(error) + } else { + buf.push(0); + Ok(buf) + } + } + bridge::LitKind::CStrRaw(_) => { + // Raw strings have no escapes so we can convert the symbol + // directly to a `Lrc` after appending the terminating NUL + // char. + let mut buf = symbol.to_owned().into_bytes(); + buf.push(0); + Ok(buf) + } + _ => Err(ConversionErrorKind::InvalidLiteralKind), + }) + } + + /// Returns the unescaped string value if the current literal is a byte string or a byte string + /// literal. + #[unstable(feature = "proc_macro_value", issue = "136652")] + pub fn byte_str_value(&self) -> Result, ConversionErrorKind> { + self.0.symbol.with(|symbol| match self.0.kind { + bridge::LitKind::ByteStr => { + let mut buf = Vec::with_capacity(symbol.len()); + let mut error = None; + + unescape_unicode(symbol, Mode::ByteStr, &mut |_, c| match c { + Ok(c) => buf.push(byte_from_char(c)), + Err(err) => { + if err.is_fatal() { + error = Some(ConversionErrorKind::FailedToUnescape(err)); + } + } + }); + if let Some(error) = error { Err(error) } else { Ok(buf) } + } + bridge::LitKind::ByteStrRaw(_) => { + // Raw strings have no escapes so we can convert the symbol + // directly to a `Lrc`. + Ok(symbol.to_owned().into_bytes()) + } + _ => Err(ConversionErrorKind::InvalidLiteralKind), + }) + } } /// Parse a single literal from its stringified representation.