1
Fork 0

Rollup merge of #138682 - Alexendoo:extra-symbols, r=fee1-dead

Allow drivers to supply a list of extra symbols to intern

Allows adding new symbols as `const`s in external drivers, desirable in Clippy so we can use them in patterns to replace code like 75530e9f72/src/tools/clippy/clippy_lints/src/casts/cast_ptr_alignment.rs (L66)

The Clippy change adds a couple symbols as a demo, the exact `clippy_utils` API and replacing other usages can be done on the Clippy side to minimise sync conflicts

---

try-job: aarch64-gnu
This commit is contained in:
Stuart Cook 2025-04-11 13:31:44 +10:00 committed by GitHub
commit 0abc6c6e98
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 115 additions and 58 deletions

View file

@ -264,6 +264,7 @@ pub fn run_compiler(at_args: &[String], callbacks: &mut (dyn Callbacks + Send))
hash_untracked_state: None,
register_lints: None,
override_queries: None,
extra_symbols: Vec::new(),
make_codegen_backend: None,
registry: diagnostics_registry(),
using_internal_features: &USING_INTERNAL_FEATURES,

View file

@ -17,7 +17,7 @@ fn def_path_hash_depends_on_crate_id() {
// the crate by changing the crate disambiguator (e.g. via bumping the
// crate's version number).
create_session_globals_then(Edition::Edition2024, None, || {
create_session_globals_then(Edition::Edition2024, &[], None, || {
let id0 = StableCrateId::new(Symbol::intern("foo"), false, vec!["1".to_string()], "");
let id1 = StableCrateId::new(Symbol::intern("foo"), false, vec!["2".to_string()], "");

View file

@ -348,6 +348,10 @@ pub struct Config {
/// the list of queries.
pub override_queries: Option<fn(&Session, &mut Providers)>,
/// An extra set of symbols to add to the symbol interner, the symbol indices
/// will start at [`PREDEFINED_SYMBOLS_COUNT`](rustc_span::symbol::PREDEFINED_SYMBOLS_COUNT)
pub extra_symbols: Vec<&'static str>,
/// This is a callback from the driver that is called to create a codegen backend.
///
/// Has no uses within this repository, but is used by bjorn3 for "the
@ -409,6 +413,7 @@ pub fn run_compiler<R: Send>(config: Config, f: impl FnOnce(&Compiler) -> R + Se
&early_dcx,
config.opts.edition,
config.opts.unstable_opts.threads,
&config.extra_symbols,
SourceMapInputs { file_loader, path_mapping, hash_kind, checksum_hash_kind },
|current_gcx| {
// The previous `early_dcx` can't be reused here because it doesn't

View file

@ -53,7 +53,7 @@ where
checksum_hash_kind,
});
rustc_span::create_session_globals_then(DEFAULT_EDITION, sm_inputs, || {
rustc_span::create_session_globals_then(DEFAULT_EDITION, &[], sm_inputs, || {
let temps_dir = sessopts.unstable_opts.temps_dir.as_deref().map(PathBuf::from);
let io = CompilerIO {
input: Input::Str { name: FileName::Custom(String::new()), input: String::new() },

View file

@ -117,6 +117,7 @@ fn run_in_thread_with_globals<F: FnOnce(CurrentGcx) -> R + Send, R: Send>(
thread_stack_size: usize,
edition: Edition,
sm_inputs: SourceMapInputs,
extra_symbols: &[&'static str],
f: F,
) -> R {
// The "thread pool" is a single spawned thread in the non-parallel
@ -134,9 +135,12 @@ fn run_in_thread_with_globals<F: FnOnce(CurrentGcx) -> R + Send, R: Send>(
// name contains null bytes.
let r = builder
.spawn_scoped(s, move || {
rustc_span::create_session_globals_then(edition, Some(sm_inputs), || {
f(CurrentGcx::new())
})
rustc_span::create_session_globals_then(
edition,
extra_symbols,
Some(sm_inputs),
|| f(CurrentGcx::new()),
)
})
.unwrap()
.join();
@ -152,6 +156,7 @@ pub(crate) fn run_in_thread_pool_with_globals<F: FnOnce(CurrentGcx) -> R + Send,
thread_builder_diag: &EarlyDiagCtxt,
edition: Edition,
threads: usize,
extra_symbols: &[&'static str],
sm_inputs: SourceMapInputs,
f: F,
) -> R {
@ -168,12 +173,18 @@ pub(crate) fn run_in_thread_pool_with_globals<F: FnOnce(CurrentGcx) -> R + Send,
let registry = sync::Registry::new(std::num::NonZero::new(threads).unwrap());
if !sync::is_dyn_thread_safe() {
return run_in_thread_with_globals(thread_stack_size, edition, sm_inputs, |current_gcx| {
// Register the thread for use with the `WorkerLocal` type.
registry.register();
return run_in_thread_with_globals(
thread_stack_size,
edition,
sm_inputs,
extra_symbols,
|current_gcx| {
// Register the thread for use with the `WorkerLocal` type.
registry.register();
f(current_gcx)
});
f(current_gcx)
},
);
}
let current_gcx = FromDyn::from(CurrentGcx::new());
@ -230,7 +241,7 @@ pub(crate) fn run_in_thread_pool_with_globals<F: FnOnce(CurrentGcx) -> R + Send,
// pool. Upon creation, each worker thread created gets a copy of the
// session globals in TLS. This is possible because `SessionGlobals` impls
// `Send` in the parallel compiler.
rustc_span::create_session_globals_then(edition, Some(sm_inputs), || {
rustc_span::create_session_globals_then(edition, extra_symbols, Some(sm_inputs), || {
rustc_span::with_session_globals(|session_globals| {
let session_globals = FromDyn::from(session_globals);
builder

View file

@ -142,13 +142,13 @@ pub(super) fn symbols(input: TokenStream) -> TokenStream {
output
}
struct Preinterned {
struct Predefined {
idx: u32,
span_of_name: Span,
}
struct Entries {
map: HashMap<String, Preinterned>,
map: HashMap<String, Predefined>,
}
impl Entries {
@ -163,7 +163,7 @@ impl Entries {
prev.idx
} else {
let idx = self.len();
self.map.insert(s.to_string(), Preinterned { idx, span_of_name: span });
self.map.insert(s.to_string(), Predefined { idx, span_of_name: span });
idx
}
}
@ -295,10 +295,14 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
}
let symbol_digits_base = entries.map["0"].idx;
let preinterned_symbols_count = entries.len();
let predefined_symbols_count = entries.len();
let output = quote! {
const SYMBOL_DIGITS_BASE: u32 = #symbol_digits_base;
const PREINTERNED_SYMBOLS_COUNT: u32 = #preinterned_symbols_count;
/// The number of predefined symbols; this is the the first index for
/// extra pre-interned symbols in an Interner created via
/// [`Interner::with_extra_symbols`].
pub const PREDEFINED_SYMBOLS_COUNT: u32 = #predefined_symbols_count;
#[doc(hidden)]
#[allow(non_upper_case_globals)]
@ -315,10 +319,13 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
}
impl Interner {
pub(crate) fn fresh() -> Self {
Interner::prefill(&[
#prefill_stream
])
/// Creates an `Interner` with the predefined symbols from the `symbols!` macro and
/// any extra symbols provided by external drivers such as Clippy
pub(crate) fn with_extra_symbols(extra_symbols: &[&'static str]) -> Self {
Interner::prefill(
&[#prefill_stream],
extra_symbols,
)
}
}
};

View file

@ -562,9 +562,9 @@ impl<'a, 'tcx> SpanDecoder for DecodeContext<'a, 'tcx> {
Symbol::intern(s)
})
}
SYMBOL_PREINTERNED => {
SYMBOL_PREDEFINED => {
let symbol_index = self.read_u32();
Symbol::new_from_decoded(symbol_index)
Symbol::new(symbol_index)
}
_ => unreachable!(),
}

View file

@ -201,9 +201,9 @@ impl<'a, 'tcx> SpanEncoder for EncodeContext<'a, 'tcx> {
}
fn encode_symbol(&mut self, symbol: Symbol) {
// if symbol preinterned, emit tag and symbol index
if symbol.is_preinterned() {
self.opaque.emit_u8(SYMBOL_PREINTERNED);
// if symbol predefined, emit tag and symbol index
if symbol.is_predefined() {
self.opaque.emit_u8(SYMBOL_PREDEFINED);
self.opaque.emit_u32(symbol.as_u32());
} else {
// otherwise write it as string or as offset to it

View file

@ -586,7 +586,7 @@ impl SpanTag {
// Tags for encoding Symbol's
const SYMBOL_STR: u8 = 0;
const SYMBOL_OFFSET: u8 = 1;
const SYMBOL_PREINTERNED: u8 = 2;
const SYMBOL_PREDEFINED: u8 = 2;
pub fn provide(providers: &mut Providers) {
encoder::provide(providers);

View file

@ -46,7 +46,7 @@ const TAG_EXPN_DATA: u8 = 1;
// Tags for encoding Symbol's
const SYMBOL_STR: u8 = 0;
const SYMBOL_OFFSET: u8 = 1;
const SYMBOL_PREINTERNED: u8 = 2;
const SYMBOL_PREDEFINED: u8 = 2;
/// Provides an interface to incremental compilation data cached from the
/// previous compilation session. This data will eventually include the results
@ -674,9 +674,9 @@ impl<'a, 'tcx> SpanDecoder for CacheDecoder<'a, 'tcx> {
Symbol::intern(s)
})
}
SYMBOL_PREINTERNED => {
SYMBOL_PREDEFINED => {
let symbol_index = self.read_u32();
Symbol::new_from_decoded(symbol_index)
Symbol::new(symbol_index)
}
_ => unreachable!(),
}
@ -892,9 +892,9 @@ impl<'a, 'tcx> SpanEncoder for CacheEncoder<'a, 'tcx> {
// copy&paste impl from rustc_metadata
fn encode_symbol(&mut self, symbol: Symbol) {
// if symbol preinterned, emit tag and symbol index
if symbol.is_preinterned() {
self.encoder.emit_u8(SYMBOL_PREINTERNED);
// if symbol predefined, emit tag and symbol index
if symbol.is_predefined() {
self.encoder.emit_u8(SYMBOL_PREDEFINED);
self.encoder.emit_u32(symbol.as_u32());
} else {
// otherwise write it as string or as offset to it

View file

@ -116,9 +116,13 @@ pub struct SessionGlobals {
}
impl SessionGlobals {
pub fn new(edition: Edition, sm_inputs: Option<SourceMapInputs>) -> SessionGlobals {
pub fn new(
edition: Edition,
extra_symbols: &[&'static str],
sm_inputs: Option<SourceMapInputs>,
) -> SessionGlobals {
SessionGlobals {
symbol_interner: symbol::Interner::fresh(),
symbol_interner: symbol::Interner::with_extra_symbols(extra_symbols),
span_interner: Lock::new(span_encoding::SpanInterner::default()),
metavar_spans: Default::default(),
hygiene_data: Lock::new(hygiene::HygieneData::new(edition)),
@ -129,6 +133,7 @@ impl SessionGlobals {
pub fn create_session_globals_then<R>(
edition: Edition,
extra_symbols: &[&'static str],
sm_inputs: Option<SourceMapInputs>,
f: impl FnOnce() -> R,
) -> R {
@ -137,7 +142,7 @@ pub fn create_session_globals_then<R>(
"SESSION_GLOBALS should never be overwritten! \
Use another thread if you need another SessionGlobals"
);
let session_globals = SessionGlobals::new(edition, sm_inputs);
let session_globals = SessionGlobals::new(edition, extra_symbols, sm_inputs);
SESSION_GLOBALS.set(&session_globals, f)
}
@ -156,7 +161,7 @@ where
F: FnOnce(&SessionGlobals) -> R,
{
if !SESSION_GLOBALS.is_set() {
let session_globals = SessionGlobals::new(edition, None);
let session_globals = SessionGlobals::new(edition, &[], None);
SESSION_GLOBALS.set(&session_globals, || SESSION_GLOBALS.with(f))
} else {
SESSION_GLOBALS.with(f)
@ -172,7 +177,7 @@ where
/// Default edition, no source map.
pub fn create_default_session_globals_then<R>(f: impl FnOnce() -> R) -> R {
create_session_globals_then(edition::DEFAULT_EDITION, None, f)
create_session_globals_then(edition::DEFAULT_EDITION, &[], None, f)
}
// If this ever becomes non thread-local, `decode_syntax_context`

View file

@ -2540,15 +2540,10 @@ rustc_index::newtype_index! {
}
impl Symbol {
const fn new(n: u32) -> Self {
pub const fn new(n: u32) -> Self {
Symbol(SymbolIndex::from_u32(n))
}
/// for use in Decoder only
pub fn new_from_decoded(n: u32) -> Self {
Self::new(n)
}
/// Maps a string to its interned representation.
#[rustc_diagnostic_item = "SymbolIntern"]
pub fn intern(string: &str) -> Self {
@ -2634,11 +2629,14 @@ struct InternerInner {
}
impl Interner {
fn prefill(init: &[&'static str]) -> Self {
Interner(Lock::new(InternerInner {
arena: Default::default(),
strings: init.iter().copied().collect(),
}))
fn prefill(init: &[&'static str], extra: &[&'static str]) -> Self {
let strings = FxIndexSet::from_iter(init.iter().copied().chain(extra.iter().copied()));
assert_eq!(
strings.len(),
init.len() + extra.len(),
"`init` or `extra` contain duplicate symbols",
);
Interner(Lock::new(InternerInner { arena: Default::default(), strings }))
}
#[inline]
@ -2762,9 +2760,9 @@ impl Symbol {
self != kw::Empty && self != kw::Underscore && !self.is_path_segment_keyword()
}
/// Is this symbol was interned in compiler's `symbols!` macro
pub fn is_preinterned(self) -> bool {
self.as_u32() < PREINTERNED_SYMBOLS_COUNT
/// Was this symbol predefined in the compiler's `symbols!` macro
pub fn is_predefined(self) -> bool {
self.as_u32() < PREDEFINED_SYMBOLS_COUNT
}
}

View file

@ -3,7 +3,7 @@ use crate::create_default_session_globals_then;
#[test]
fn interner_tests() {
let i = Interner::prefill(&[]);
let i = Interner::prefill(&[], &[]);
// first one is zero:
assert_eq!(i.intern("dog"), Symbol::new(0));
// re-use gets the same entry: