Extract unescape from rustc_lexer into its own crate

This commit is contained in:
Guillaume Gomez 2025-01-31 15:02:41 +01:00
parent c03c38d5c2
commit 49d2d5a116
14 changed files with 36 additions and 12 deletions

View file

@ -2151,6 +2151,10 @@ version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
[[package]]
name = "literal-escaper"
version = "0.0.1"
[[package]]
name = "lld-wrapper"
version = "0.1.0"
@ -3366,6 +3370,7 @@ name = "rustc_ast"
version = "0.0.0"
dependencies = [
"bitflags",
"literal-escaper",
"memchr",
"rustc_ast_ir",
"rustc_data_structures",
@ -4325,6 +4330,7 @@ name = "rustc_parse"
version = "0.0.0"
dependencies = [
"bitflags",
"literal-escaper",
"rustc_ast",
"rustc_ast_pretty",
"rustc_data_structures",
@ -4347,6 +4353,7 @@ dependencies = [
name = "rustc_parse_format"
version = "0.0.0"
dependencies = [
"literal-escaper",
"rustc_index",
"rustc_lexer",
]

View file

@ -6,6 +6,7 @@ edition = "2021"
[dependencies]
# tidy-alphabetical-start
bitflags = "2.4.1"
literal-escaper = { path = "../../library/literal-escaper" }
memchr = "2.7.4"
rustc_ast_ir = { path = "../rustc_ast_ir" }
rustc_data_structures = { path = "../rustc_data_structures" }

View file

@ -2,7 +2,7 @@
use std::{ascii, fmt, str};
use rustc_lexer::unescape::{
use literal_escaper::{
MixedUnit, Mode, byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode,
};
use rustc_span::{Span, Symbol, kw, sym};

View file

@ -27,7 +27,6 @@
// tidy-alphabetical-end
mod cursor;
pub mod unescape;
#[cfg(test)]
mod tests;

View file

@ -6,6 +6,7 @@ edition = "2021"
[dependencies]
# tidy-alphabetical-start
bitflags = "2.4.1"
literal-escaper = { path = "../../library/literal-escaper" }
rustc_ast = { path = "../rustc_ast" }
rustc_ast_pretty = { path = "../rustc_ast_pretty" }
rustc_data_structures = { path = "../rustc_data_structures" }

View file

@ -1,12 +1,12 @@
use std::ops::Range;
use literal_escaper::{self, EscapeError, Mode};
use rustc_ast::ast::{self, AttrStyle};
use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind};
use rustc_ast::tokenstream::TokenStream;
use rustc_ast::util::unicode::contains_text_flow_control_chars;
use rustc_errors::codes::*;
use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey};
use rustc_lexer::unescape::{self, EscapeError, Mode};
use rustc_lexer::{Base, Cursor, DocStyle, LiteralKind, RawStrError};
use rustc_session::lint::BuiltinLintDiag;
use rustc_session::lint::builtin::{
@ -970,7 +970,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
postfix_len: u32,
) -> (token::LitKind, Symbol) {
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
unescape::unescape_unicode(src, mode, &mut |span, result| {
literal_escaper::unescape_unicode(src, mode, &mut |span, result| {
callback(span, result.map(drop))
})
})
@ -986,7 +986,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
postfix_len: u32,
) -> (token::LitKind, Symbol) {
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
unescape::unescape_mixed(src, mode, &mut |span, result| {
literal_escaper::unescape_mixed(src, mode, &mut |span, result| {
callback(span, result.map(drop))
})
})

View file

@ -3,8 +3,8 @@
use std::iter::once;
use std::ops::Range;
use literal_escaper::{EscapeError, Mode};
use rustc_errors::{Applicability, DiagCtxtHandle, ErrorGuaranteed};
use rustc_lexer::unescape::{EscapeError, Mode};
use rustc_span::{BytePos, Span};
use tracing::debug;

View file

@ -6,6 +6,7 @@ use core::ops::{Bound, ControlFlow};
use ast::mut_visit::{self, MutVisitor};
use ast::token::IdentIsRaw;
use ast::{CoroutineKind, ForLoopKind, GenBlockKind, MatchKind, Pat, Path, PathSegment, Recovered};
use literal_escaper::unescape_char;
use rustc_ast::ptr::P;
use rustc_ast::token::{self, Delimiter, Token, TokenKind};
use rustc_ast::tokenstream::TokenTree;
@ -21,7 +22,6 @@ use rustc_ast::{
use rustc_ast_pretty::pprust;
use rustc_data_structures::stack::ensure_sufficient_stack;
use rustc_errors::{Applicability, Diag, PResult, StashKey, Subdiagnostic};
use rustc_lexer::unescape::unescape_char;
use rustc_macros::Subdiagnostic;
use rustc_session::errors::{ExprParenthesesNeeded, report_lit_error};
use rustc_session::lint::BuiltinLintDiag;

View file

@ -5,6 +5,7 @@ edition = "2021"
[dependencies]
# tidy-alphabetical-start
literal-escaper = { path = "../../library/literal-escaper" }
rustc_index = { path = "../rustc_index", default-features = false }
rustc_lexer = { path = "../rustc_lexer" }
# tidy-alphabetical-end

View file

@ -19,7 +19,6 @@
pub use Alignment::*;
pub use Count::*;
pub use Position::*;
use rustc_lexer::unescape;
// Note: copied from rustc_span
/// Range inside of a `Span` used for diagnostics when we only have access to relative positions.
@ -1095,12 +1094,14 @@ fn find_width_map_from_snippet(
fn unescape_string(string: &str) -> Option<String> {
let mut buf = String::new();
let mut ok = true;
unescape::unescape_unicode(string, unescape::Mode::Str, &mut |_, unescaped_char| {
match unescaped_char {
literal_escaper::unescape_unicode(
string,
literal_escaper::Mode::Str,
&mut |_, unescaped_char| match unescaped_char {
Ok(c) => buf.push(c),
Err(_) => ok = false,
}
});
},
);
ok.then_some(buf)
}

View file

@ -0,0 +1,10 @@
[package]
name = "literal-escaper"
version = "0.0.0"
edition = "2021"
[dependencies]
std = { version = '1.0.0', optional = true, package = 'rustc-std-workspace-std' }
[features]
rustc-dep-of-std = ["dep:std"]

View file

@ -0,0 +1,4 @@
# literal-escaper
This crate provides code to unescape string literals. It is used by `rustc_lexer`
and `proc-macro`.