1
Fork 0

proc_macro: stop using a remote object handle for Ident

Doing this for all unicode identifiers would require a dependency on
`unicode-normalization` and `rustc_lexer`, which is currently not
possible for `proc_macro` due to it being built concurrently with `std`
and `core`. Instead, ASCII identifiers are validated locally, and an RPC
message is used to validate unicode identifiers when needed.

String values are interned on the both the server and client when
deserializing, to avoid unnecessary copies and keep Ident cheap to copy and
move. This appears to be important for performance.

The client-side interner is based roughly on the one from rustc_span, and uses
an arena inspired by rustc_arena.

RPC messages passing symbols always include the full value. This could
potentially be optimized in the future if it is revealed to be a
performance bottleneck.

Despite now having a relevant implementaion of Display for Ident, ToString is
still specialized, as it is a hot-path for this object.

The symbol infrastructure will also be used for literals in the next
part.
This commit is contained in:
Nika Layzell 2022-06-30 21:05:46 -04:00
parent e0dce6ec8d
commit 491fccfbe3
11 changed files with 441 additions and 114 deletions

View file

@ -11,13 +11,13 @@ use rustc_parse::lexer::nfc_normalize;
use rustc_parse::parse_stream_from_source_str; use rustc_parse::parse_stream_from_source_str;
use rustc_session::parse::ParseSess; use rustc_session::parse::ParseSess;
use rustc_span::def_id::CrateNum; use rustc_span::def_id::CrateNum;
use rustc_span::symbol::{self, kw, sym, Symbol}; use rustc_span::symbol::{self, sym, Symbol};
use rustc_span::{BytePos, FileName, Pos, SourceFile, Span}; use rustc_span::{BytePos, FileName, Pos, SourceFile, Span};
use pm::bridge::{server, DelimSpan, ExpnGlobals, Group, Punct, TokenTree}; use pm::bridge::{server, DelimSpan, ExpnGlobals, Group, Ident, Punct, TokenTree};
use pm::{Delimiter, Level, LineColumn}; use pm::{Delimiter, Level, LineColumn};
use std::ascii;
use std::ops::Bound; use std::ops::Bound;
use std::{ascii, panic};
trait FromInternal<T> { trait FromInternal<T> {
fn from_internal(x: T) -> Self; fn from_internal(x: T) -> Self;
@ -50,7 +50,7 @@ impl ToInternal<token::Delimiter> for Delimiter {
} }
impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)>
for Vec<TokenTree<TokenStream, Span, Ident, Literal>> for Vec<TokenTree<TokenStream, Span, Symbol, Literal>>
{ {
fn from_internal((stream, rustc): (TokenStream, &mut Rustc<'_, '_>)) -> Self { fn from_internal((stream, rustc): (TokenStream, &mut Rustc<'_, '_>)) -> Self {
use rustc_ast::token::*; use rustc_ast::token::*;
@ -135,13 +135,12 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)>
Question => op("?"), Question => op("?"),
SingleQuote => op("'"), SingleQuote => op("'"),
Ident(name, false) if name == kw::DollarCrate => trees.push(TokenTree::Ident(Ident::dollar_crate(span))), Ident(sym, is_raw) => trees.push(TokenTree::Ident(Ident { sym, is_raw, span })),
Ident(name, is_raw) => trees.push(TokenTree::Ident(Ident::new(rustc.sess(), name, is_raw, span))),
Lifetime(name) => { Lifetime(name) => {
let ident = symbol::Ident::new(name, span).without_first_quote(); let ident = symbol::Ident::new(name, span).without_first_quote();
trees.extend([ trees.extend([
TokenTree::Punct(Punct { ch: b'\'', joint: true, span }), TokenTree::Punct(Punct { ch: b'\'', joint: true, span }),
TokenTree::Ident(Ident::new(rustc.sess(), ident.name, false, span)), TokenTree::Ident(Ident { sym: ident.name, is_raw: false, span }),
]); ]);
} }
Literal(lit) => trees.push(TokenTree::Literal(self::Literal { lit, span })), Literal(lit) => trees.push(TokenTree::Literal(self::Literal { lit, span })),
@ -170,7 +169,7 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)>
} }
Interpolated(nt) if let NtIdent(ident, is_raw) = *nt => { Interpolated(nt) if let NtIdent(ident, is_raw) = *nt => {
trees.push(TokenTree::Ident(Ident::new(rustc.sess(), ident.name, is_raw, ident.span))) trees.push(TokenTree::Ident(Ident { sym: ident.name, is_raw, span: ident.span }))
} }
Interpolated(nt) => { Interpolated(nt) => {
@ -200,11 +199,14 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)>
} }
} }
impl ToInternal<TokenStream> for TokenTree<TokenStream, Span, Ident, Literal> { impl ToInternal<TokenStream>
for (TokenTree<TokenStream, Span, Symbol, Literal>, &mut Rustc<'_, '_>)
{
fn to_internal(self) -> TokenStream { fn to_internal(self) -> TokenStream {
use rustc_ast::token::*; use rustc_ast::token::*;
let (ch, joint, span) = match self { let (tree, rustc) = self;
let (ch, joint, span) = match tree {
TokenTree::Punct(Punct { ch, joint, span }) => (ch, joint, span), TokenTree::Punct(Punct { ch, joint, span }) => (ch, joint, span),
TokenTree::Group(Group { delimiter, stream, span: DelimSpan { open, close, .. } }) => { TokenTree::Group(Group { delimiter, stream, span: DelimSpan { open, close, .. } }) => {
return tokenstream::TokenTree::Delimited( return tokenstream::TokenTree::Delimited(
@ -215,6 +217,7 @@ impl ToInternal<TokenStream> for TokenTree<TokenStream, Span, Ident, Literal> {
.into(); .into();
} }
TokenTree::Ident(self::Ident { sym, is_raw, span }) => { TokenTree::Ident(self::Ident { sym, is_raw, span }) => {
rustc.sess().symbol_gallery.insert(sym, span);
return tokenstream::TokenTree::token(Ident(sym, is_raw), span).into(); return tokenstream::TokenTree::token(Ident(sym, is_raw), span).into();
} }
TokenTree::Literal(self::Literal { TokenTree::Literal(self::Literal {
@ -289,33 +292,6 @@ impl ToInternal<rustc_errors::Level> for Level {
pub struct FreeFunctions; pub struct FreeFunctions;
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
pub struct Ident {
sym: Symbol,
is_raw: bool,
span: Span,
}
impl Ident {
fn new(sess: &ParseSess, sym: Symbol, is_raw: bool, span: Span) -> Ident {
let sym = nfc_normalize(sym.as_str());
let string = sym.as_str();
if !rustc_lexer::is_ident(string) {
panic!("`{:?}` is not a valid identifier", string)
}
if is_raw && !sym.can_be_raw() {
panic!("`{}` cannot be a raw identifier", string);
}
sess.symbol_gallery.insert(sym, span);
Ident { sym, is_raw, span }
}
fn dollar_crate(span: Span) -> Ident {
// `$crate` is accepted as an ident only if it comes from the compiler.
Ident { sym: kw::DollarCrate, is_raw: false, span }
}
}
// FIXME(eddyb) `Literal` should not expose internal `Debug` impls. // FIXME(eddyb) `Literal` should not expose internal `Debug` impls.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Literal { pub struct Literal {
@ -357,12 +333,12 @@ impl<'a, 'b> Rustc<'a, 'b> {
impl server::Types for Rustc<'_, '_> { impl server::Types for Rustc<'_, '_> {
type FreeFunctions = FreeFunctions; type FreeFunctions = FreeFunctions;
type TokenStream = TokenStream; type TokenStream = TokenStream;
type Ident = Ident;
type Literal = Literal; type Literal = Literal;
type SourceFile = Lrc<SourceFile>; type SourceFile = Lrc<SourceFile>;
type MultiSpan = Vec<Span>; type MultiSpan = Vec<Span>;
type Diagnostic = Diagnostic; type Diagnostic = Diagnostic;
type Span = Span; type Span = Span;
type Symbol = Symbol;
} }
impl server::FreeFunctions for Rustc<'_, '_> { impl server::FreeFunctions for Rustc<'_, '_> {
@ -453,22 +429,22 @@ impl server::TokenStream for Rustc<'_, '_> {
fn from_token_tree( fn from_token_tree(
&mut self, &mut self,
tree: TokenTree<Self::TokenStream, Self::Span, Self::Ident, Self::Literal>, tree: TokenTree<Self::TokenStream, Self::Span, Self::Symbol, Self::Literal>,
) -> Self::TokenStream { ) -> Self::TokenStream {
tree.to_internal() (tree, &mut *self).to_internal()
} }
fn concat_trees( fn concat_trees(
&mut self, &mut self,
base: Option<Self::TokenStream>, base: Option<Self::TokenStream>,
trees: Vec<TokenTree<Self::TokenStream, Self::Span, Self::Ident, Self::Literal>>, trees: Vec<TokenTree<Self::TokenStream, Self::Span, Self::Symbol, Self::Literal>>,
) -> Self::TokenStream { ) -> Self::TokenStream {
let mut builder = tokenstream::TokenStreamBuilder::new(); let mut builder = tokenstream::TokenStreamBuilder::new();
if let Some(base) = base { if let Some(base) = base {
builder.push(base); builder.push(base);
} }
for tree in trees { for tree in trees {
builder.push(tree.to_internal()); builder.push((tree, &mut *self).to_internal());
} }
builder.build() builder.build()
} }
@ -491,25 +467,11 @@ impl server::TokenStream for Rustc<'_, '_> {
fn into_trees( fn into_trees(
&mut self, &mut self,
stream: Self::TokenStream, stream: Self::TokenStream,
) -> Vec<TokenTree<Self::TokenStream, Self::Span, Self::Ident, Self::Literal>> { ) -> Vec<TokenTree<Self::TokenStream, Self::Span, Self::Symbol, Self::Literal>> {
FromInternal::from_internal((stream, self)) FromInternal::from_internal((stream, self))
} }
} }
impl server::Ident for Rustc<'_, '_> {
fn new(&mut self, string: &str, span: Self::Span, is_raw: bool) -> Self::Ident {
Ident::new(self.sess(), Symbol::intern(string), is_raw, span)
}
fn span(&mut self, ident: Self::Ident) -> Self::Span {
ident.span
}
fn with_span(&mut self, ident: Self::Ident, span: Self::Span) -> Self::Ident {
Ident { span, ..ident }
}
}
impl server::Literal for Rustc<'_, '_> { impl server::Literal for Rustc<'_, '_> {
fn from_str(&mut self, s: &str) -> Result<Self::Literal, ()> { fn from_str(&mut self, s: &str) -> Result<Self::Literal, ()> {
let name = FileName::proc_macro_source_code(s); let name = FileName::proc_macro_source_code(s);
@ -812,6 +774,13 @@ impl server::Span for Rustc<'_, '_> {
} }
} }
impl server::Symbol for Rustc<'_, '_> {
fn normalize_and_validate_ident(&mut self, string: &str) -> Result<Self::Symbol, ()> {
let sym = nfc_normalize(string);
if rustc_lexer::is_ident(sym.as_str()) { Ok(sym) } else { Err(()) }
}
}
impl server::Server for Rustc<'_, '_> { impl server::Server for Rustc<'_, '_> {
fn globals(&mut self) -> ExpnGlobals<Self::Span> { fn globals(&mut self) -> ExpnGlobals<Self::Span> {
ExpnGlobals { ExpnGlobals {
@ -820,4 +789,12 @@ impl server::Server for Rustc<'_, '_> {
mixed_site: self.mixed_site, mixed_site: self.mixed_site,
} }
} }
fn intern_symbol(string: &str) -> Self::Symbol {
Symbol::intern(string)
}
fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str)) {
f(&symbol.as_str())
}
} }

View file

@ -0,0 +1,113 @@
//! A minimal arena allocator inspired by `rustc_arena::DroplessArena`.
//!
//! This is unfortunately a minimal re-implementation rather than a dependency
//! as it is difficult to depend on crates from within `proc_macro`, due to it
//! being built at the same time as `std`.
use std::cell::{Cell, RefCell};
use std::cmp;
use std::mem::MaybeUninit;
use std::ops::Range;
use std::ptr;
use std::slice;
use std::str;
// The arenas start with PAGE-sized chunks, and then each new chunk is twice as
// big as its predecessor, up until we reach HUGE_PAGE-sized chunks, whereupon
// we stop growing. This scales well, from arenas that are barely used up to
// arenas that are used for 100s of MiBs. Note also that the chosen sizes match
// the usual sizes of pages and huge pages on Linux.
const PAGE: usize = 4096;
const HUGE_PAGE: usize = 2 * 1024 * 1024;
/// A minimal arena allocator inspired by `rustc_arena::DroplessArena`.
///
/// This is unfortunately a complete re-implementation rather than a dependency
/// as it is difficult to depend on crates from within `proc_macro`, due to it
/// being built at the same time as `std`.
///
/// This arena doesn't have support for allocating anything other than byte
/// slices, as that is all that is necessary.
pub(crate) struct Arena {
start: Cell<*mut MaybeUninit<u8>>,
end: Cell<*mut MaybeUninit<u8>>,
chunks: RefCell<Vec<Box<[MaybeUninit<u8>]>>>,
}
impl Arena {
pub(crate) fn new() -> Self {
Arena {
start: Cell::new(ptr::null_mut()),
end: Cell::new(ptr::null_mut()),
chunks: RefCell::new(Vec::new()),
}
}
/// Add a new chunk with at least `additional` free bytes.
#[inline(never)]
#[cold]
fn grow(&self, additional: usize) {
let mut chunks = self.chunks.borrow_mut();
let mut new_cap;
if let Some(last_chunk) = chunks.last_mut() {
// If the previous chunk's len is less than HUGE_PAGE
// bytes, then this chunk will be least double the previous
// chunk's size.
new_cap = last_chunk.len().min(HUGE_PAGE / 2);
new_cap *= 2;
} else {
new_cap = PAGE;
}
// Also ensure that this chunk can fit `additional`.
new_cap = cmp::max(additional, new_cap);
let mut chunk = Box::new_uninit_slice(new_cap);
let Range { start, end } = chunk.as_mut_ptr_range();
self.start.set(start);
self.end.set(end);
chunks.push(chunk);
}
/// Allocates a byte slice with specified size from the current memory
/// chunk. Returns `None` if there is no free space left to satisfy the
/// request.
fn alloc_raw_without_grow(&self, bytes: usize) -> Option<&mut [MaybeUninit<u8>]> {
let start = self.start.get().addr();
let old_end = self.end.get();
let end = old_end.addr();
let new_end = end.checked_sub(bytes)?;
if start <= new_end {
let new_end = old_end.with_addr(new_end);
self.end.set(new_end);
// SAFETY: `bytes` bytes starting at `new_end` were just reserved.
Some(unsafe { slice::from_raw_parts_mut(new_end, bytes) })
} else {
None
}
}
fn alloc_raw(&self, bytes: usize) -> &mut [MaybeUninit<u8>] {
if bytes == 0 {
return &mut [];
}
loop {
if let Some(a) = self.alloc_raw_without_grow(bytes) {
break a;
}
// No free space left. Allocate a new chunk to satisfy the request.
// On failure the grow will panic or abort.
self.grow(bytes);
}
}
pub(crate) fn alloc_str<'a>(&'a self, string: &str) -> &'a mut str {
let alloc = self.alloc_raw(string.len());
let bytes = MaybeUninit::write_slice(alloc, string.as_bytes());
// SAFETY: we convert from `&str` to `&[u8]`, clone it into the arena,
// and immediately convert the clone back to `&str`.
unsafe { str::from_utf8_unchecked_mut(bytes) }
}
}

View file

@ -181,7 +181,6 @@ define_handles! {
Diagnostic, Diagnostic,
'interned: 'interned:
Ident,
Span, Span,
} }
@ -242,6 +241,8 @@ impl fmt::Debug for Span {
} }
} }
pub(crate) use super::symbol::Symbol;
macro_rules! define_client_side { macro_rules! define_client_side {
($($name:ident { ($($name:ident {
$(fn $method:ident($($arg:ident: $arg_ty:ty),* $(,)?) $(-> $ret_ty:ty)*;)* $(fn $method:ident($($arg:ident: $arg_ty:ty),* $(,)?) $(-> $ret_ty:ty)*;)*
@ -405,6 +406,9 @@ fn run_client<A: for<'a, 's> DecodeMut<'a, 's, ()>, R: Encode<()>>(
panic::catch_unwind(panic::AssertUnwindSafe(|| { panic::catch_unwind(panic::AssertUnwindSafe(|| {
maybe_install_panic_hook(force_show_panics); maybe_install_panic_hook(force_show_panics);
// Make sure the symbol store is empty before decoding inputs.
Symbol::invalidate_all();
let reader = &mut &buf[..]; let reader = &mut &buf[..];
let (globals, input) = <(ExpnGlobals<Span>, A)>::decode(reader, &mut ()); let (globals, input) = <(ExpnGlobals<Span>, A)>::decode(reader, &mut ());
@ -438,6 +442,10 @@ fn run_client<A: for<'a, 's> DecodeMut<'a, 's, ()>, R: Encode<()>>(
buf.clear(); buf.clear();
Err::<(), _>(e).encode(&mut buf, &mut ()); Err::<(), _>(e).encode(&mut buf, &mut ());
}); });
// Now that a response has been serialized, invalidate all symbols
// registered with the interner.
Symbol::invalidate_all();
buf buf
} }

View file

@ -65,11 +65,11 @@ macro_rules! with_api {
fn from_str(src: &str) -> $S::TokenStream; fn from_str(src: &str) -> $S::TokenStream;
fn to_string($self: &$S::TokenStream) -> String; fn to_string($self: &$S::TokenStream) -> String;
fn from_token_tree( fn from_token_tree(
tree: TokenTree<$S::TokenStream, $S::Span, $S::Ident, $S::Literal>, tree: TokenTree<$S::TokenStream, $S::Span, $S::Symbol, $S::Literal>,
) -> $S::TokenStream; ) -> $S::TokenStream;
fn concat_trees( fn concat_trees(
base: Option<$S::TokenStream>, base: Option<$S::TokenStream>,
trees: Vec<TokenTree<$S::TokenStream, $S::Span, $S::Ident, $S::Literal>>, trees: Vec<TokenTree<$S::TokenStream, $S::Span, $S::Symbol, $S::Literal>>,
) -> $S::TokenStream; ) -> $S::TokenStream;
fn concat_streams( fn concat_streams(
base: Option<$S::TokenStream>, base: Option<$S::TokenStream>,
@ -77,12 +77,7 @@ macro_rules! with_api {
) -> $S::TokenStream; ) -> $S::TokenStream;
fn into_trees( fn into_trees(
$self: $S::TokenStream $self: $S::TokenStream
) -> Vec<TokenTree<$S::TokenStream, $S::Span, $S::Ident, $S::Literal>>; ) -> Vec<TokenTree<$S::TokenStream, $S::Span, $S::Symbol, $S::Literal>>;
},
Ident {
fn new(string: &str, span: $S::Span, is_raw: bool) -> $S::Ident;
fn span($self: $S::Ident) -> $S::Span;
fn with_span($self: $S::Ident, span: $S::Span) -> $S::Ident;
}, },
Literal { Literal {
fn drop($self: $S::Literal); fn drop($self: $S::Literal);
@ -146,6 +141,9 @@ macro_rules! with_api {
fn save_span($self: $S::Span) -> usize; fn save_span($self: $S::Span) -> usize;
fn recover_proc_macro_span(id: usize) -> $S::Span; fn recover_proc_macro_span(id: usize) -> $S::Span;
}, },
Symbol {
fn normalize_and_validate_ident(string: &str) -> Result<$S::Symbol, ()>;
},
} }
}; };
} }
@ -170,6 +168,8 @@ macro_rules! reverse_decode {
} }
} }
#[allow(unsafe_code)]
mod arena;
#[allow(unsafe_code)] #[allow(unsafe_code)]
mod buffer; mod buffer;
#[forbid(unsafe_code)] #[forbid(unsafe_code)]
@ -189,6 +189,8 @@ mod scoped_cell;
mod selfless_reify; mod selfless_reify;
#[forbid(unsafe_code)] #[forbid(unsafe_code)]
pub mod server; pub mod server;
#[allow(unsafe_code)]
mod symbol;
use buffer::Buffer; use buffer::Buffer;
pub use rpc::PanicMessage; pub use rpc::PanicMessage;
@ -466,16 +468,25 @@ pub struct Punct<Span> {
compound_traits!(struct Punct<Span> { ch, joint, span }); compound_traits!(struct Punct<Span> { ch, joint, span });
#[derive(Copy, Clone, Eq, PartialEq)]
pub struct Ident<Span, Symbol> {
pub sym: Symbol,
pub is_raw: bool,
pub span: Span,
}
compound_traits!(struct Ident<Span, Symbol> { sym, is_raw, span });
#[derive(Clone)] #[derive(Clone)]
pub enum TokenTree<TokenStream, Span, Ident, Literal> { pub enum TokenTree<TokenStream, Span, Symbol, Literal> {
Group(Group<TokenStream, Span>), Group(Group<TokenStream, Span>),
Punct(Punct<Span>), Punct(Punct<Span>),
Ident(Ident), Ident(Ident<Span, Symbol>),
Literal(Literal), Literal(Literal),
} }
compound_traits!( compound_traits!(
enum TokenTree<TokenStream, Span, Ident, Literal> { enum TokenTree<TokenStream, Span, Symbol, Literal> {
Group(tt), Group(tt),
Punct(tt), Punct(tt),
Ident(tt), Ident(tt),

View file

@ -8,12 +8,12 @@ use super::client::HandleStore;
pub trait Types { pub trait Types {
type FreeFunctions: 'static; type FreeFunctions: 'static;
type TokenStream: 'static + Clone; type TokenStream: 'static + Clone;
type Ident: 'static + Copy + Eq + Hash;
type Literal: 'static + Clone; type Literal: 'static + Clone;
type SourceFile: 'static + Clone; type SourceFile: 'static + Clone;
type MultiSpan: 'static; type MultiSpan: 'static;
type Diagnostic: 'static; type Diagnostic: 'static;
type Span: 'static + Copy + Eq + Hash; type Span: 'static + Copy + Eq + Hash;
type Symbol: 'static;
} }
/// Declare an associated fn of one of the traits below, adding necessary /// Declare an associated fn of one of the traits below, adding necessary
@ -38,6 +38,12 @@ macro_rules! declare_server_traits {
pub trait Server: Types $(+ $name)* { pub trait Server: Types $(+ $name)* {
fn globals(&mut self) -> ExpnGlobals<Self::Span>; fn globals(&mut self) -> ExpnGlobals<Self::Span>;
/// Intern a symbol received from RPC
fn intern_symbol(ident: &str) -> Self::Symbol;
/// Recover the string value of a symbol, and invoke a callback with it.
fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str));
} }
} }
} }
@ -49,6 +55,12 @@ impl<S: Server> Server for MarkedTypes<S> {
fn globals(&mut self) -> ExpnGlobals<Self::Span> { fn globals(&mut self) -> ExpnGlobals<Self::Span> {
<_>::mark(Server::globals(&mut self.0)) <_>::mark(Server::globals(&mut self.0))
} }
fn intern_symbol(ident: &str) -> Self::Symbol {
<_>::mark(S::intern_symbol(ident))
}
fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str)) {
S::with_symbol_string(symbol.unmark(), f)
}
} }
macro_rules! define_mark_types_impls { macro_rules! define_mark_types_impls {
@ -81,11 +93,13 @@ macro_rules! define_dispatcher_impl {
pub trait DispatcherTrait { pub trait DispatcherTrait {
// HACK(eddyb) these are here to allow `Self::$name` to work below. // HACK(eddyb) these are here to allow `Self::$name` to work below.
$(type $name;)* $(type $name;)*
fn dispatch(&mut self, buf: Buffer) -> Buffer; fn dispatch(&mut self, buf: Buffer) -> Buffer;
} }
impl<S: Server> DispatcherTrait for Dispatcher<MarkedTypes<S>> { impl<S: Server> DispatcherTrait for Dispatcher<MarkedTypes<S>> {
$(type $name = <MarkedTypes<S> as Types>::$name;)* $(type $name = <MarkedTypes<S> as Types>::$name;)*
fn dispatch(&mut self, mut buf: Buffer) -> Buffer { fn dispatch(&mut self, mut buf: Buffer) -> Buffer {
let Dispatcher { handle_store, server } = self; let Dispatcher { handle_store, server } = self;

View file

@ -0,0 +1,205 @@
//! Client-side interner used for symbols.
//!
//! This is roughly based on the symbol interner from `rustc_span` and the
//! DroplessArena from `rustc_arena`. It is unfortunately a complete
//! copy/re-implementation rather than a dependency as it is difficult to depend
//! on crates from within `proc_macro`, due to it being built at the same time
//! as `std`.
//!
//! If at some point in the future it becomes easier to add dependencies to
//! proc_macro, this module should probably be removed or simplified.
use std::cell::RefCell;
use std::num::NonZeroU32;
use std::str;
use super::*;
/// Handle for a symbol string stored within the Interner.
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
pub struct Symbol(NonZeroU32);
impl !Send for Symbol {}
impl !Sync for Symbol {}
impl Symbol {
/// Intern a new `Symbol`
pub(crate) fn new(string: &str) -> Self {
INTERNER.with_borrow_mut(|i| i.intern(string))
}
/// Create a new `Symbol` for an identifier.
///
/// Validates and normalizes before converting it to a symbol.
pub(crate) fn new_ident(string: &str, is_raw: bool) -> Self {
// Fast-path: check if this is a valid ASCII identifier
if Self::is_valid_ascii_ident(string.as_bytes()) {
if is_raw && !Self::can_be_raw(string) {
panic!("`{}` cannot be a raw identifier", string);
}
return Self::new(string);
}
// Slow-path: If the string is already ASCII we're done, otherwise ask
// our server to do this for us over RPC.
// We don't need to check for identifiers which can't be raw here,
// because all of them are ASCII.
if string.is_ascii() {
Err(())
} else {
client::Symbol::normalize_and_validate_ident(string)
}
.unwrap_or_else(|_| panic!("`{:?}` is not a valid identifier", string))
}
/// Run a callback with the symbol's string value.
pub(crate) fn with<R>(self, f: impl FnOnce(&str) -> R) -> R {
INTERNER.with_borrow(|i| f(i.get(self)))
}
/// Clear out the thread-local symbol interner, making all previously
/// created symbols invalid such that `with` will panic when called on them.
pub(crate) fn invalidate_all() {
INTERNER.with_borrow_mut(|i| i.clear());
}
/// Check if the ident is a valid ASCII identifier.
///
/// This is a short-circuit which is cheap to implement within the
/// proc-macro client to avoid RPC when creating simple idents, but may
/// return `false` for a valid identifier if it contains non-ASCII
/// characters.
fn is_valid_ascii_ident(bytes: &[u8]) -> bool {
matches!(bytes.first(), Some(b'_' | b'a'..=b'z' | b'A'..=b'Z'))
&& bytes[1..]
.iter()
.all(|b| matches!(b, b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9'))
}
// Mimics the behaviour of `Symbol::can_be_raw` from `rustc_span`
fn can_be_raw(string: &str) -> bool {
match string {
"_" | "super" | "self" | "Self" | "crate" => false,
_ => true,
}
}
}
impl fmt::Debug for Symbol {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.with(|s| fmt::Debug::fmt(s, f))
}
}
impl ToString for Symbol {
fn to_string(&self) -> String {
self.with(|s| s.to_owned())
}
}
impl fmt::Display for Symbol {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.with(|s| fmt::Display::fmt(s, f))
}
}
impl<S> Encode<S> for Symbol {
fn encode(self, w: &mut Writer, s: &mut S) {
self.with(|sym| sym.encode(w, s))
}
}
impl<S: server::Server> DecodeMut<'_, '_, client::HandleStore<server::MarkedTypes<S>>>
for Marked<S::Symbol, Symbol>
{
fn decode(r: &mut Reader<'_>, s: &mut client::HandleStore<server::MarkedTypes<S>>) -> Self {
Mark::mark(S::intern_symbol(<&str>::decode(r, s)))
}
}
impl<S: server::Server> Encode<client::HandleStore<server::MarkedTypes<S>>>
for Marked<S::Symbol, Symbol>
{
fn encode(self, w: &mut Writer, s: &mut client::HandleStore<server::MarkedTypes<S>>) {
S::with_symbol_string(&self.unmark(), |sym| sym.encode(w, s))
}
}
impl<S> DecodeMut<'_, '_, S> for Symbol {
fn decode(r: &mut Reader<'_>, s: &mut S) -> Self {
Symbol::new(<&str>::decode(r, s))
}
}
thread_local! {
static INTERNER: RefCell<Interner> = RefCell::new(Interner {
arena: arena::Arena::new(),
names: fxhash::FxHashMap::default(),
strings: Vec::new(),
// Start with a base of 1 to make sure that `NonZeroU32` works.
sym_base: NonZeroU32::new(1).unwrap(),
});
}
/// Basic interner for a `Symbol`, inspired by the one in `rustc_span`.
struct Interner {
arena: arena::Arena,
// SAFETY: These `'static` lifetimes are actually references to data owned
// by the Arena. This is safe, as we never return them as static references
// from `Interner`.
names: fxhash::FxHashMap<&'static str, Symbol>,
strings: Vec<&'static str>,
// The offset to apply to symbol names stored in the interner. This is used
// to ensure that symbol names are not re-used after the interner is
// cleared.
sym_base: NonZeroU32,
}
impl Interner {
fn intern(&mut self, string: &str) -> Symbol {
if let Some(&name) = self.names.get(string) {
return name;
}
let name = Symbol(
self.sym_base
.checked_add(self.strings.len() as u32)
.expect("`proc_macro` symbol name overflow"),
);
let string: &str = self.arena.alloc_str(string);
// SAFETY: we can extend the arena allocation to `'static` because we
// only access these while the arena is still alive.
let string: &'static str = unsafe { &*(string as *const str) };
self.strings.push(string);
self.names.insert(string, name);
name
}
/// Read a symbol's value from the store while it is held.
fn get(&self, symbol: Symbol) -> &str {
// NOTE: Subtract out the offset which was added to make the symbol
// nonzero and prevent symbol name re-use.
let name = symbol
.0
.get()
.checked_sub(self.sym_base.get())
.expect("use-after-free of `proc_macro` symbol");
self.strings[name as usize]
}
/// Clear all symbols from the store, invalidating them such that `get` will
/// panic if they are accessed in the future.
fn clear(&mut self) {
// NOTE: Be careful not to panic here, as we may be called on the client
// when a `catch_unwind` isn't installed.
self.sym_base = self.sym_base.saturating_add(self.strings.len() as u32);
self.names.clear();
self.strings.clear();
// SAFETY: This is cleared after the names and strings tables are
// cleared out, so no references into the arena should remain.
self.arena = arena::Arena::new();
}
}

View file

@ -24,10 +24,14 @@
#![feature(staged_api)] #![feature(staged_api)]
#![feature(allow_internal_unstable)] #![feature(allow_internal_unstable)]
#![feature(decl_macro)] #![feature(decl_macro)]
#![feature(local_key_cell_methods)]
#![feature(maybe_uninit_write_slice)]
#![feature(negative_impls)] #![feature(negative_impls)]
#![feature(new_uninit)]
#![feature(restricted_std)] #![feature(restricted_std)]
#![feature(rustc_attrs)] #![feature(rustc_attrs)]
#![feature(min_specialization)] #![feature(min_specialization)]
#![feature(strict_provenance)]
#![recursion_limit = "256"] #![recursion_limit = "256"]
#[unstable(feature = "proc_macro_internals", issue = "27812")] #[unstable(feature = "proc_macro_internals", issue = "27812")]
@ -214,7 +218,7 @@ fn tree_to_bridge_tree(
) -> bridge::TokenTree< ) -> bridge::TokenTree<
bridge::client::TokenStream, bridge::client::TokenStream,
bridge::client::Span, bridge::client::Span,
bridge::client::Ident, bridge::client::Symbol,
bridge::client::Literal, bridge::client::Literal,
> { > {
match tree { match tree {
@ -240,7 +244,7 @@ struct ConcatTreesHelper {
bridge::TokenTree< bridge::TokenTree<
bridge::client::TokenStream, bridge::client::TokenStream,
bridge::client::Span, bridge::client::Span,
bridge::client::Ident, bridge::client::Symbol,
bridge::client::Literal, bridge::client::Literal,
>, >,
>, >,
@ -367,7 +371,7 @@ pub mod token_stream {
bridge::TokenTree< bridge::TokenTree<
bridge::client::TokenStream, bridge::client::TokenStream,
bridge::client::Span, bridge::client::Span,
bridge::client::Ident, bridge::client::Symbol,
bridge::client::Literal, bridge::client::Literal,
>, >,
>, >,
@ -1048,7 +1052,7 @@ impl PartialEq<Punct> for char {
/// An identifier (`ident`). /// An identifier (`ident`).
#[derive(Clone)] #[derive(Clone)]
#[stable(feature = "proc_macro_lib2", since = "1.29.0")] #[stable(feature = "proc_macro_lib2", since = "1.29.0")]
pub struct Ident(bridge::client::Ident); pub struct Ident(bridge::Ident<bridge::client::Span, bridge::client::Symbol>);
impl Ident { impl Ident {
/// Creates a new `Ident` with the given `string` as well as the specified /// Creates a new `Ident` with the given `string` as well as the specified
@ -1072,7 +1076,11 @@ impl Ident {
/// tokens, requires a `Span` to be specified at construction. /// tokens, requires a `Span` to be specified at construction.
#[stable(feature = "proc_macro_lib2", since = "1.29.0")] #[stable(feature = "proc_macro_lib2", since = "1.29.0")]
pub fn new(string: &str, span: Span) -> Ident { pub fn new(string: &str, span: Span) -> Ident {
Ident(bridge::client::Ident::new(string, span.0, false)) Ident(bridge::Ident {
sym: bridge::client::Symbol::new_ident(string, false),
is_raw: false,
span: span.0,
})
} }
/// Same as `Ident::new`, but creates a raw identifier (`r#ident`). /// Same as `Ident::new`, but creates a raw identifier (`r#ident`).
@ -1081,38 +1089,45 @@ impl Ident {
/// (e.g. `self`, `super`) are not supported, and will cause a panic. /// (e.g. `self`, `super`) are not supported, and will cause a panic.
#[stable(feature = "proc_macro_raw_ident", since = "1.47.0")] #[stable(feature = "proc_macro_raw_ident", since = "1.47.0")]
pub fn new_raw(string: &str, span: Span) -> Ident { pub fn new_raw(string: &str, span: Span) -> Ident {
Ident(bridge::client::Ident::new(string, span.0, true)) Ident(bridge::Ident {
sym: bridge::client::Symbol::new_ident(string, true),
is_raw: true,
span: span.0,
})
} }
/// Returns the span of this `Ident`, encompassing the entire string returned /// Returns the span of this `Ident`, encompassing the entire string returned
/// by [`to_string`](Self::to_string). /// by [`to_string`](ToString::to_string).
#[stable(feature = "proc_macro_lib2", since = "1.29.0")] #[stable(feature = "proc_macro_lib2", since = "1.29.0")]
pub fn span(&self) -> Span { pub fn span(&self) -> Span {
Span(self.0.span()) Span(self.0.span)
} }
/// Configures the span of this `Ident`, possibly changing its hygiene context. /// Configures the span of this `Ident`, possibly changing its hygiene context.
#[stable(feature = "proc_macro_lib2", since = "1.29.0")] #[stable(feature = "proc_macro_lib2", since = "1.29.0")]
pub fn set_span(&mut self, span: Span) { pub fn set_span(&mut self, span: Span) {
self.0 = self.0.with_span(span.0); self.0.span = span.0;
} }
} }
// N.B., the bridge only provides `to_string`, implement `fmt::Display` /// Converts the identifier to a string that should be losslessly convertible
// based on it (the reverse of the usual relationship between the two). /// back into the same identifier.
#[stable(feature = "proc_macro_lib", since = "1.15.0")] #[stable(feature = "proc_macro_lib2", since = "1.29.0")]
impl ToString for Ident { impl ToString for Ident {
fn to_string(&self) -> String { fn to_string(&self) -> String {
TokenStream::from(TokenTree::from(self.clone())).to_string() self.0.sym.with(|sym| if self.0.is_raw { ["r#", sym].concat() } else { sym.to_owned() })
} }
} }
/// Prints the identifier as a string that should be losslessly convertible /// Prints the identifier as a string that should be losslessly convertible back
/// back into the same identifier. /// into the same identifier.
#[stable(feature = "proc_macro_lib2", since = "1.29.0")] #[stable(feature = "proc_macro_lib2", since = "1.29.0")]
impl fmt::Display for Ident { impl fmt::Display for Ident {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.to_string()) if self.0.is_raw {
f.write_str("r#")?;
}
fmt::Display::fmt(&self.0.sym, f)
} }
} }

View file

@ -1,17 +1,9 @@
// aux-build:invalid-punct-ident.rs // aux-build:invalid-punct-ident.rs
// rustc-env:RUST_BACKTRACE=0 // ignore-stage1
// only-linux
// FIXME https://github.com/rust-lang/rust/issues/59998 //
// normalize-stderr-test "thread.*panicked.*proc_macro_server.rs.*\n" -> "" // FIXME: This should be a normal (stage1, all platforms) test in
// normalize-stderr-test "note:.*RUST_BACKTRACE=1.*\n" -> "" // src/test/ui/proc-macro once issue #59998 is fixed.
// normalize-stderr-test "\nerror: internal compiler error.*\n\n" -> ""
// normalize-stderr-test "note:.*unexpectedly panicked.*\n\n" -> ""
// normalize-stderr-test "note: we would appreciate a bug report.*\n\n" -> ""
// normalize-stderr-test "note: compiler flags.*\n\n" -> ""
// normalize-stderr-test "note: rustc.*running on.*\n\n" -> ""
// normalize-stderr-test "query stack during panic:\n" -> ""
// normalize-stderr-test "we're just showing a limited slice of the query stack\n" -> ""
// normalize-stderr-test "end of query stack\n" -> ""
#[macro_use] #[macro_use]
extern crate invalid_punct_ident; extern crate invalid_punct_ident;

View file

@ -1,5 +1,5 @@
error: proc macro panicked error: proc macro panicked
--> $DIR/invalid-punct-ident-2.rs:19:1 --> $DIR/invalid-punct-ident-2.rs:11:1
| |
LL | invalid_ident!(); LL | invalid_ident!();
| ^^^^^^^^^^^^^^^^ | ^^^^^^^^^^^^^^^^

View file

@ -1,17 +1,9 @@
// aux-build:invalid-punct-ident.rs // aux-build:invalid-punct-ident.rs
// rustc-env:RUST_BACKTRACE=0 // ignore-stage1
// only-linux
// FIXME https://github.com/rust-lang/rust/issues/59998 //
// normalize-stderr-test "thread.*panicked.*proc_macro_server.rs.*\n" -> "" // FIXME: This should be a normal (stage1, all platforms) test in
// normalize-stderr-test "note:.*RUST_BACKTRACE=1.*\n" -> "" // src/test/ui/proc-macro once issue #59998 is fixed.
// normalize-stderr-test "\nerror: internal compiler error.*\n\n" -> ""
// normalize-stderr-test "note:.*unexpectedly panicked.*\n\n" -> ""
// normalize-stderr-test "note: we would appreciate a bug report.*\n\n" -> ""
// normalize-stderr-test "note: compiler flags.*\n\n" -> ""
// normalize-stderr-test "note: rustc.*running on.*\n\n" -> ""
// normalize-stderr-test "query stack during panic:\n" -> ""
// normalize-stderr-test "we're just showing a limited slice of the query stack\n" -> ""
// normalize-stderr-test "end of query stack\n" -> ""
#[macro_use] #[macro_use]
extern crate invalid_punct_ident; extern crate invalid_punct_ident;

View file

@ -1,5 +1,5 @@
error: proc macro panicked error: proc macro panicked
--> $DIR/invalid-punct-ident-3.rs:19:1 --> $DIR/invalid-punct-ident-3.rs:11:1
| |
LL | invalid_raw_ident!(); LL | invalid_raw_ident!();
| ^^^^^^^^^^^^^^^^^^^^ | ^^^^^^^^^^^^^^^^^^^^