Auto merge of #100803 - klensy:do-not-encode-preinterned-symbols, r=bjorn3

Symbols: do not write string values of preinterned symbols into compiled artifacts

r? `@bjorn3`

Followup for #98851

https://github.com/rust-lang/rust/pull/98851#issuecomment-1215606291
This commit is contained in:
bors 2022-08-24 16:56:32 +00:00
commit ebfc7aa531
6 changed files with 57 additions and 23 deletions

View file

@ -195,10 +195,10 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
#n, #n,
}); });
} }
let _ = counter; // for future use
let output = quote! { let output = quote! {
const SYMBOL_DIGITS_BASE: u32 = #digits_base; const SYMBOL_DIGITS_BASE: u32 = #digits_base;
const PREINTERNED_SYMBOLS_COUNT: u32 = #counter;
#[doc(hidden)] #[doc(hidden)]
#[allow(non_upper_case_globals)] #[allow(non_upper_case_globals)]

View file

@ -631,6 +631,10 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Symbol {
sym sym
} }
SYMBOL_PREINTERNED => {
let symbol_index = d.read_u32();
Symbol::new_from_decoded(symbol_index)
}
_ => unreachable!(), _ => unreachable!(),
} }
} }

View file

@ -317,17 +317,24 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Symbol { impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Symbol {
fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) { fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) {
match s.symbol_table.entry(*self) { // if symbol preinterned, emit tag and symbol index
Entry::Vacant(o) => { if self.is_preinterned() {
s.opaque.emit_u8(SYMBOL_STR); s.opaque.emit_u8(SYMBOL_PREINTERNED);
let pos = s.opaque.position(); s.opaque.emit_u32(self.as_u32());
o.insert(pos); } else {
s.emit_str(self.as_str()); // otherwise write it as string or as offset to it
} match s.symbol_table.entry(*self) {
Entry::Occupied(o) => { Entry::Vacant(o) => {
let x = o.get().clone(); s.opaque.emit_u8(SYMBOL_STR);
s.emit_u8(SYMBOL_OFFSET); let pos = s.opaque.position();
s.emit_usize(x); o.insert(pos);
s.emit_str(self.as_str());
}
Entry::Occupied(o) => {
let x = o.get().clone();
s.emit_u8(SYMBOL_OFFSET);
s.emit_usize(x);
}
} }
} }
} }

View file

@ -448,6 +448,7 @@ const TAG_PARTIAL_SPAN: u8 = 2;
// Tags for encoding Symbol's // Tags for encoding Symbol's
const SYMBOL_STR: u8 = 0; const SYMBOL_STR: u8 = 0;
const SYMBOL_OFFSET: u8 = 1; const SYMBOL_OFFSET: u8 = 1;
const SYMBOL_PREINTERNED: u8 = 2;
pub fn provide(providers: &mut Providers) { pub fn provide(providers: &mut Providers) {
encoder::provide(providers); encoder::provide(providers);

View file

@ -42,6 +42,7 @@ const TAG_EXPN_DATA: u8 = 1;
// Tags for encoding Symbol's // Tags for encoding Symbol's
const SYMBOL_STR: u8 = 0; const SYMBOL_STR: u8 = 0;
const SYMBOL_OFFSET: u8 = 1; const SYMBOL_OFFSET: u8 = 1;
const SYMBOL_PREINTERNED: u8 = 2;
/// Provides an interface to incremental compilation data cached from the /// Provides an interface to incremental compilation data cached from the
/// previous compilation session. This data will eventually include the results /// previous compilation session. This data will eventually include the results
@ -745,6 +746,10 @@ impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for Symbol {
sym sym
} }
SYMBOL_PREINTERNED => {
let symbol_index = d.read_u32();
Symbol::new_from_decoded(symbol_index)
}
_ => unreachable!(), _ => unreachable!(),
} }
} }
@ -939,17 +944,24 @@ impl<'a, 'tcx> Encodable<CacheEncoder<'a, 'tcx>> for Span {
// copy&paste impl from rustc_metadata // copy&paste impl from rustc_metadata
impl<'a, 'tcx> Encodable<CacheEncoder<'a, 'tcx>> for Symbol { impl<'a, 'tcx> Encodable<CacheEncoder<'a, 'tcx>> for Symbol {
fn encode(&self, s: &mut CacheEncoder<'a, 'tcx>) { fn encode(&self, s: &mut CacheEncoder<'a, 'tcx>) {
match s.symbol_table.entry(*self) { // if symbol preinterned, emit tag and symbol index
Entry::Vacant(o) => { if self.is_preinterned() {
s.encoder.emit_u8(SYMBOL_STR); s.encoder.emit_u8(SYMBOL_PREINTERNED);
let pos = s.encoder.position(); s.encoder.emit_u32(self.as_u32());
o.insert(pos); } else {
s.emit_str(self.as_str()); // otherwise write it as string or as offset to it
} match s.symbol_table.entry(*self) {
Entry::Occupied(o) => { Entry::Vacant(o) => {
let x = o.get().clone(); s.encoder.emit_u8(SYMBOL_STR);
s.emit_u8(SYMBOL_OFFSET); let pos = s.encoder.position();
s.emit_usize(x); o.insert(pos);
s.emit_str(self.as_str());
}
Entry::Occupied(o) => {
let x = o.get().clone();
s.emit_u8(SYMBOL_OFFSET);
s.emit_usize(x);
}
} }
} }
} }

View file

@ -1804,6 +1804,11 @@ impl Symbol {
Symbol(SymbolIndex::from_u32(n)) Symbol(SymbolIndex::from_u32(n))
} }
/// for use in Decoder only
pub fn new_from_decoded(n: u32) -> Self {
Self::new(n)
}
/// Maps a string to its interned representation. /// Maps a string to its interned representation.
pub fn intern(string: &str) -> Self { pub fn intern(string: &str) -> Self {
with_session_globals(|session_globals| session_globals.symbol_interner.intern(string)) with_session_globals(|session_globals| session_globals.symbol_interner.intern(string))
@ -2028,6 +2033,11 @@ impl Symbol {
pub fn can_be_raw(self) -> bool { pub fn can_be_raw(self) -> bool {
self != kw::Empty && self != kw::Underscore && !self.is_path_segment_keyword() self != kw::Empty && self != kw::Underscore && !self.is_path_segment_keyword()
} }
/// Is this symbol was interned in compiler's `symbols!` macro
pub fn is_preinterned(self) -> bool {
self.as_u32() < PREINTERNED_SYMBOLS_COUNT
}
} }
impl Ident { impl Ident {