Auto merge of #98851 - klensy:encode_symbols, r=cjgillot
rustc_metadata: dedupe strings to prevent multiple copies in rmeta/query cache blow file size r? `@cjgillot` Encodes strings in rmeta/query cache so duplicated ones will be encoded as offsets to first strings, reducing file size.
This commit is contained in:
commit
71ecf5d359
5 changed files with 113 additions and 3 deletions
|
@ -22,8 +22,9 @@ use rustc_span::hygiene::{
|
|||
ExpnId, HygieneDecodeContext, HygieneEncodeContext, SyntaxContext, SyntaxContextData,
|
||||
};
|
||||
use rustc_span::source_map::{SourceMap, StableSourceFileId};
|
||||
use rustc_span::CachingSourceMapView;
|
||||
use rustc_span::{BytePos, ExpnData, ExpnHash, Pos, SourceFile, Span};
|
||||
use rustc_span::{CachingSourceMapView, Symbol};
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::io;
|
||||
use std::mem;
|
||||
|
||||
|
@ -38,6 +39,10 @@ const TAG_RELATIVE_SPAN: u8 = 2;
|
|||
const TAG_SYNTAX_CONTEXT: u8 = 0;
|
||||
const TAG_EXPN_DATA: u8 = 1;
|
||||
|
||||
// Tags for encoding Symbol's
|
||||
const SYMBOL_STR: u8 = 0;
|
||||
const SYMBOL_OFFSET: u8 = 1;
|
||||
|
||||
/// Provides an interface to incremental compilation data cached from the
|
||||
/// previous compilation session. This data will eventually include the results
|
||||
/// of a few selected queries (like `typeck` and `mir_optimized`) and
|
||||
|
@ -254,6 +259,7 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
|
|||
source_map: CachingSourceMapView::new(tcx.sess.source_map()),
|
||||
file_to_file_index,
|
||||
hygiene_context: &hygiene_encode_context,
|
||||
symbol_table: Default::default(),
|
||||
};
|
||||
|
||||
// Encode query results.
|
||||
|
@ -714,6 +720,36 @@ impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for Span {
|
|||
}
|
||||
}
|
||||
|
||||
// copy&paste impl from rustc_metadata
|
||||
impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for Symbol {
|
||||
fn decode(d: &mut CacheDecoder<'a, 'tcx>) -> Self {
|
||||
let tag = d.read_u8();
|
||||
|
||||
match tag {
|
||||
SYMBOL_STR => {
|
||||
let s = d.read_str();
|
||||
Symbol::intern(s)
|
||||
}
|
||||
SYMBOL_OFFSET => {
|
||||
// read str offset
|
||||
let pos = d.read_usize();
|
||||
let old_pos = d.opaque.position();
|
||||
|
||||
// move to str ofset and read
|
||||
d.opaque.set_position(pos);
|
||||
let s = d.read_str();
|
||||
let sym = Symbol::intern(s);
|
||||
|
||||
// restore position
|
||||
d.opaque.set_position(old_pos);
|
||||
|
||||
sym
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for CrateNum {
|
||||
fn decode(d: &mut CacheDecoder<'a, 'tcx>) -> Self {
|
||||
let stable_id = StableCrateId::decode(d);
|
||||
|
@ -815,6 +851,7 @@ pub struct CacheEncoder<'a, 'tcx> {
|
|||
source_map: CachingSourceMapView<'tcx>,
|
||||
file_to_file_index: FxHashMap<*const SourceFile, SourceFileIndex>,
|
||||
hygiene_context: &'a HygieneEncodeContext,
|
||||
symbol_table: FxHashMap<Symbol, usize>,
|
||||
}
|
||||
|
||||
impl<'a, 'tcx> CacheEncoder<'a, 'tcx> {
|
||||
|
@ -899,6 +936,25 @@ impl<'a, 'tcx> Encodable<CacheEncoder<'a, 'tcx>> for Span {
|
|||
}
|
||||
}
|
||||
|
||||
// copy&paste impl from rustc_metadata
|
||||
impl<'a, 'tcx> Encodable<CacheEncoder<'a, 'tcx>> for Symbol {
|
||||
fn encode(&self, s: &mut CacheEncoder<'a, 'tcx>) {
|
||||
match s.symbol_table.entry(*self) {
|
||||
Entry::Vacant(o) => {
|
||||
s.encoder.emit_u8(SYMBOL_STR);
|
||||
let pos = s.encoder.position();
|
||||
o.insert(pos);
|
||||
s.emit_str(self.as_str());
|
||||
}
|
||||
Entry::Occupied(o) => {
|
||||
let x = o.get().clone();
|
||||
s.emit_u8(SYMBOL_OFFSET);
|
||||
s.emit_usize(x);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 'tcx> TyEncoder for CacheEncoder<'a, 'tcx> {
|
||||
type I = TyCtxt<'tcx>;
|
||||
const CLEAR_CROSS_CRATE: bool = false;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue