1
Fork 0

Extract unescape from rustc_lexer into its own crate

This commit is contained in:
Guillaume Gomez 2025-01-31 15:02:41 +01:00
parent c03c38d5c2
commit 49d2d5a116
14 changed files with 36 additions and 12 deletions

View file

@ -2151,6 +2151,10 @@ version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
[[package]]
name = "literal-escaper"
version = "0.0.1"
[[package]] [[package]]
name = "lld-wrapper" name = "lld-wrapper"
version = "0.1.0" version = "0.1.0"
@ -3366,6 +3370,7 @@ name = "rustc_ast"
version = "0.0.0" version = "0.0.0"
dependencies = [ dependencies = [
"bitflags", "bitflags",
"literal-escaper",
"memchr", "memchr",
"rustc_ast_ir", "rustc_ast_ir",
"rustc_data_structures", "rustc_data_structures",
@ -4325,6 +4330,7 @@ name = "rustc_parse"
version = "0.0.0" version = "0.0.0"
dependencies = [ dependencies = [
"bitflags", "bitflags",
"literal-escaper",
"rustc_ast", "rustc_ast",
"rustc_ast_pretty", "rustc_ast_pretty",
"rustc_data_structures", "rustc_data_structures",
@ -4347,6 +4353,7 @@ dependencies = [
name = "rustc_parse_format" name = "rustc_parse_format"
version = "0.0.0" version = "0.0.0"
dependencies = [ dependencies = [
"literal-escaper",
"rustc_index", "rustc_index",
"rustc_lexer", "rustc_lexer",
] ]

View file

@ -6,6 +6,7 @@ edition = "2021"
[dependencies] [dependencies]
# tidy-alphabetical-start # tidy-alphabetical-start
bitflags = "2.4.1" bitflags = "2.4.1"
literal-escaper = { path = "../../library/literal-escaper" }
memchr = "2.7.4" memchr = "2.7.4"
rustc_ast_ir = { path = "../rustc_ast_ir" } rustc_ast_ir = { path = "../rustc_ast_ir" }
rustc_data_structures = { path = "../rustc_data_structures" } rustc_data_structures = { path = "../rustc_data_structures" }

View file

@ -2,7 +2,7 @@
use std::{ascii, fmt, str}; use std::{ascii, fmt, str};
use rustc_lexer::unescape::{ use literal_escaper::{
MixedUnit, Mode, byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode, MixedUnit, Mode, byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode,
}; };
use rustc_span::{Span, Symbol, kw, sym}; use rustc_span::{Span, Symbol, kw, sym};

View file

@ -27,7 +27,6 @@
// tidy-alphabetical-end // tidy-alphabetical-end
mod cursor; mod cursor;
pub mod unescape;
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;

View file

@ -6,6 +6,7 @@ edition = "2021"
[dependencies] [dependencies]
# tidy-alphabetical-start # tidy-alphabetical-start
bitflags = "2.4.1" bitflags = "2.4.1"
literal-escaper = { path = "../../library/literal-escaper" }
rustc_ast = { path = "../rustc_ast" } rustc_ast = { path = "../rustc_ast" }
rustc_ast_pretty = { path = "../rustc_ast_pretty" } rustc_ast_pretty = { path = "../rustc_ast_pretty" }
rustc_data_structures = { path = "../rustc_data_structures" } rustc_data_structures = { path = "../rustc_data_structures" }

View file

@ -1,12 +1,12 @@
use std::ops::Range; use std::ops::Range;
use literal_escaper::{self, EscapeError, Mode};
use rustc_ast::ast::{self, AttrStyle}; use rustc_ast::ast::{self, AttrStyle};
use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind}; use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind};
use rustc_ast::tokenstream::TokenStream; use rustc_ast::tokenstream::TokenStream;
use rustc_ast::util::unicode::contains_text_flow_control_chars; use rustc_ast::util::unicode::contains_text_flow_control_chars;
use rustc_errors::codes::*; use rustc_errors::codes::*;
use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey}; use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey};
use rustc_lexer::unescape::{self, EscapeError, Mode};
use rustc_lexer::{Base, Cursor, DocStyle, LiteralKind, RawStrError}; use rustc_lexer::{Base, Cursor, DocStyle, LiteralKind, RawStrError};
use rustc_session::lint::BuiltinLintDiag; use rustc_session::lint::BuiltinLintDiag;
use rustc_session::lint::builtin::{ use rustc_session::lint::builtin::{
@ -970,7 +970,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
postfix_len: u32, postfix_len: u32,
) -> (token::LitKind, Symbol) { ) -> (token::LitKind, Symbol) {
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
unescape::unescape_unicode(src, mode, &mut |span, result| { literal_escaper::unescape_unicode(src, mode, &mut |span, result| {
callback(span, result.map(drop)) callback(span, result.map(drop))
}) })
}) })
@ -986,7 +986,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
postfix_len: u32, postfix_len: u32,
) -> (token::LitKind, Symbol) { ) -> (token::LitKind, Symbol) {
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
unescape::unescape_mixed(src, mode, &mut |span, result| { literal_escaper::unescape_mixed(src, mode, &mut |span, result| {
callback(span, result.map(drop)) callback(span, result.map(drop))
}) })
}) })

View file

@ -3,8 +3,8 @@
use std::iter::once; use std::iter::once;
use std::ops::Range; use std::ops::Range;
use literal_escaper::{EscapeError, Mode};
use rustc_errors::{Applicability, DiagCtxtHandle, ErrorGuaranteed}; use rustc_errors::{Applicability, DiagCtxtHandle, ErrorGuaranteed};
use rustc_lexer::unescape::{EscapeError, Mode};
use rustc_span::{BytePos, Span}; use rustc_span::{BytePos, Span};
use tracing::debug; use tracing::debug;

View file

@ -6,6 +6,7 @@ use core::ops::{Bound, ControlFlow};
use ast::mut_visit::{self, MutVisitor}; use ast::mut_visit::{self, MutVisitor};
use ast::token::IdentIsRaw; use ast::token::IdentIsRaw;
use ast::{CoroutineKind, ForLoopKind, GenBlockKind, MatchKind, Pat, Path, PathSegment, Recovered}; use ast::{CoroutineKind, ForLoopKind, GenBlockKind, MatchKind, Pat, Path, PathSegment, Recovered};
use literal_escaper::unescape_char;
use rustc_ast::ptr::P; use rustc_ast::ptr::P;
use rustc_ast::token::{self, Delimiter, Token, TokenKind}; use rustc_ast::token::{self, Delimiter, Token, TokenKind};
use rustc_ast::tokenstream::TokenTree; use rustc_ast::tokenstream::TokenTree;
@ -21,7 +22,6 @@ use rustc_ast::{
use rustc_ast_pretty::pprust; use rustc_ast_pretty::pprust;
use rustc_data_structures::stack::ensure_sufficient_stack; use rustc_data_structures::stack::ensure_sufficient_stack;
use rustc_errors::{Applicability, Diag, PResult, StashKey, Subdiagnostic}; use rustc_errors::{Applicability, Diag, PResult, StashKey, Subdiagnostic};
use rustc_lexer::unescape::unescape_char;
use rustc_macros::Subdiagnostic; use rustc_macros::Subdiagnostic;
use rustc_session::errors::{ExprParenthesesNeeded, report_lit_error}; use rustc_session::errors::{ExprParenthesesNeeded, report_lit_error};
use rustc_session::lint::BuiltinLintDiag; use rustc_session::lint::BuiltinLintDiag;

View file

@ -5,6 +5,7 @@ edition = "2021"
[dependencies] [dependencies]
# tidy-alphabetical-start # tidy-alphabetical-start
literal-escaper = { path = "../../library/literal-escaper" }
rustc_index = { path = "../rustc_index", default-features = false } rustc_index = { path = "../rustc_index", default-features = false }
rustc_lexer = { path = "../rustc_lexer" } rustc_lexer = { path = "../rustc_lexer" }
# tidy-alphabetical-end # tidy-alphabetical-end

View file

@ -19,7 +19,6 @@
pub use Alignment::*; pub use Alignment::*;
pub use Count::*; pub use Count::*;
pub use Position::*; pub use Position::*;
use rustc_lexer::unescape;
// Note: copied from rustc_span // Note: copied from rustc_span
/// Range inside of a `Span` used for diagnostics when we only have access to relative positions. /// Range inside of a `Span` used for diagnostics when we only have access to relative positions.
@ -1095,12 +1094,14 @@ fn find_width_map_from_snippet(
fn unescape_string(string: &str) -> Option<String> { fn unescape_string(string: &str) -> Option<String> {
let mut buf = String::new(); let mut buf = String::new();
let mut ok = true; let mut ok = true;
unescape::unescape_unicode(string, unescape::Mode::Str, &mut |_, unescaped_char| { literal_escaper::unescape_unicode(
match unescaped_char { string,
literal_escaper::Mode::Str,
&mut |_, unescaped_char| match unescaped_char {
Ok(c) => buf.push(c), Ok(c) => buf.push(c),
Err(_) => ok = false, Err(_) => ok = false,
} },
}); );
ok.then_some(buf) ok.then_some(buf)
} }

View file

@ -0,0 +1,10 @@
[package]
name = "literal-escaper"
version = "0.0.0"
edition = "2021"
[dependencies]
std = { version = '1.0.0', optional = true, package = 'rustc-std-workspace-std' }
[features]
rustc-dep-of-std = ["dep:std"]

View file

@ -0,0 +1,4 @@
# literal-escaper
This crate provides code to unescape string literals. It is used by `rustc_lexer`
and `proc-macro`.