Auto merge of #139281 - petrochenkov:ctxtdecod6, r=wesleywiser
hygiene: Avoid recursion in syntax context decoding #139241 has two components - Avoiding recursion during syntax context decoding - Encoding/decoding only the non-redundant data, and recalculating the redundant data again during decoding Both of these parts may influence compilation times, possibly in opposite directions. So this PR contains only the first part to evaluate its effect in isolation.
This commit is contained in:
commit
da8321773a
4 changed files with 67 additions and 103 deletions
|
@ -24,9 +24,6 @@
|
|||
// because getting it wrong can lead to nested `HygieneData::with` calls that
|
||||
// trigger runtime aborts. (Fortunately these are obvious and easy to fix.)
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::hash_set::Entry as SetEntry;
|
||||
use std::hash::Hash;
|
||||
use std::sync::Arc;
|
||||
use std::{fmt, iter, mem};
|
||||
|
@ -34,7 +31,7 @@ use std::{fmt, iter, mem};
|
|||
use rustc_data_structures::fingerprint::Fingerprint;
|
||||
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
|
||||
use rustc_data_structures::stable_hasher::{HashStable, HashingControls, StableHasher};
|
||||
use rustc_data_structures::sync::{Lock, WorkerLocal};
|
||||
use rustc_data_structures::sync::Lock;
|
||||
use rustc_data_structures::unhash::UnhashMap;
|
||||
use rustc_hashes::Hash64;
|
||||
use rustc_index::IndexVec;
|
||||
|
@ -61,8 +58,8 @@ impl !PartialOrd for SyntaxContext {}
|
|||
/// The other fields are only for caching.
|
||||
type SyntaxContextKey = (SyntaxContext, ExpnId, Transparency);
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Debug, Encodable, Decodable)]
|
||||
pub struct SyntaxContextData {
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
struct SyntaxContextData {
|
||||
outer_expn: ExpnId,
|
||||
outer_transparency: Transparency,
|
||||
parent: SyntaxContext,
|
||||
|
@ -74,6 +71,17 @@ pub struct SyntaxContextData {
|
|||
dollar_crate_name: Symbol,
|
||||
}
|
||||
|
||||
/// Same as `SyntaxContextData`, but `opaque(_and_semitransparent)` cannot be recursive
|
||||
/// and use `None` if they need to refer to self. Used for encoding and decoding metadata.
|
||||
#[derive(Encodable, Decodable)]
|
||||
pub struct SyntaxContextDataNonRecursive {
|
||||
outer_expn: ExpnId,
|
||||
outer_transparency: Transparency,
|
||||
parent: SyntaxContext,
|
||||
opaque: Option<SyntaxContext>,
|
||||
opaque_and_semitransparent: Option<SyntaxContext>,
|
||||
}
|
||||
|
||||
impl SyntaxContextData {
|
||||
fn new(
|
||||
(parent, outer_expn, outer_transparency): SyntaxContextKey,
|
||||
|
@ -114,6 +122,19 @@ impl SyntaxContextData {
|
|||
}
|
||||
}
|
||||
|
||||
impl SyntaxContextDataNonRecursive {
|
||||
fn recursive(&self, ctxt: SyntaxContext) -> SyntaxContextData {
|
||||
SyntaxContextData {
|
||||
outer_expn: self.outer_expn,
|
||||
outer_transparency: self.outer_transparency,
|
||||
parent: self.parent,
|
||||
opaque: self.opaque.unwrap_or(ctxt),
|
||||
opaque_and_semitransparent: self.opaque_and_semitransparent.unwrap_or(ctxt),
|
||||
dollar_crate_name: kw::DollarCrate,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rustc_index::newtype_index! {
|
||||
/// A unique ID associated with a macro invocation and expansion.
|
||||
#[orderable]
|
||||
|
@ -637,6 +658,19 @@ impl HygieneData {
|
|||
SyntaxContextData::new(key, opaque, opaque_and_semitransparent);
|
||||
ctxt
|
||||
}
|
||||
|
||||
fn non_recursive_ctxt(&self, ctxt: SyntaxContext) -> SyntaxContextDataNonRecursive {
|
||||
debug_assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
|
||||
let data = &self.syntax_context_data[ctxt.0 as usize];
|
||||
SyntaxContextDataNonRecursive {
|
||||
outer_expn: data.outer_expn,
|
||||
outer_transparency: data.outer_transparency,
|
||||
parent: data.parent,
|
||||
opaque: (data.opaque != ctxt).then_some(data.opaque),
|
||||
opaque_and_semitransparent: (data.opaque_and_semitransparent != ctxt)
|
||||
.then_some(data.opaque_and_semitransparent),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn walk_chain(span: Span, to: SyntaxContext) -> Span {
|
||||
|
@ -1266,7 +1300,7 @@ impl HygieneEncodeContext {
|
|||
pub fn encode<T>(
|
||||
&self,
|
||||
encoder: &mut T,
|
||||
mut encode_ctxt: impl FnMut(&mut T, u32, &SyntaxContextData),
|
||||
mut encode_ctxt: impl FnMut(&mut T, u32, &SyntaxContextDataNonRecursive),
|
||||
mut encode_expn: impl FnMut(&mut T, ExpnId, &ExpnData, ExpnHash),
|
||||
) {
|
||||
// When we serialize a `SyntaxContextData`, we may end up serializing
|
||||
|
@ -1315,18 +1349,12 @@ struct HygieneDecodeContextInner {
|
|||
// so that multiple occurrences of the same serialized id are decoded to the same
|
||||
// `SyntaxContext`. This only stores `SyntaxContext`s which are completely decoded.
|
||||
remapped_ctxts: Vec<Option<SyntaxContext>>,
|
||||
|
||||
/// Maps serialized `SyntaxContext` ids that are currently being decoded to a `SyntaxContext`.
|
||||
decoding: FxHashMap<u32, SyntaxContext>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
/// Additional information used to assist in decoding hygiene data
|
||||
pub struct HygieneDecodeContext {
|
||||
inner: Lock<HygieneDecodeContextInner>,
|
||||
|
||||
/// A set of serialized `SyntaxContext` ids that are currently being decoded on each thread.
|
||||
local_in_progress: WorkerLocal<RefCell<FxHashSet<u32>>>,
|
||||
}
|
||||
|
||||
/// Register an expansion which has been decoded from the on-disk-cache for the local crate.
|
||||
|
@ -1397,7 +1425,10 @@ pub fn decode_expn_id(
|
|||
// to track which `SyntaxContext`s we have already decoded.
|
||||
// The provided closure will be invoked to deserialize a `SyntaxContextData`
|
||||
// if we haven't already seen the id of the `SyntaxContext` we are deserializing.
|
||||
pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContextData>(
|
||||
pub fn decode_syntax_context<
|
||||
D: Decoder,
|
||||
F: FnOnce(&mut D, u32) -> SyntaxContextDataNonRecursive,
|
||||
>(
|
||||
d: &mut D,
|
||||
context: &HygieneDecodeContext,
|
||||
decode_data: F,
|
||||
|
@ -1409,113 +1440,43 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
|
|||
return SyntaxContext::root();
|
||||
}
|
||||
|
||||
let pending_ctxt = {
|
||||
let mut inner = context.inner.lock();
|
||||
|
||||
// Reminder: `HygieneDecodeContext` is per-crate, so there are no collisions between
|
||||
// raw ids from different crate metadatas.
|
||||
if let Some(ctxt) = inner.remapped_ctxts.get(raw_id as usize).copied().flatten() {
|
||||
// This has already been decoded.
|
||||
return ctxt;
|
||||
}
|
||||
|
||||
match inner.decoding.entry(raw_id) {
|
||||
Entry::Occupied(ctxt_entry) => {
|
||||
let pending_ctxt = *ctxt_entry.get();
|
||||
match context.local_in_progress.borrow_mut().entry(raw_id) {
|
||||
// We're decoding this already on the current thread. Return here and let the
|
||||
// function higher up the stack finish decoding to handle recursive cases.
|
||||
// Hopefully having a `SyntaxContext` that refers to an incorrect data is ok
|
||||
// during reminder of the decoding process, it's certainly not ok after the
|
||||
// top level decoding function returns.
|
||||
SetEntry::Occupied(..) => return pending_ctxt,
|
||||
// Some other thread is currently decoding this.
|
||||
// Race with it (alternatively we could wait here).
|
||||
// We cannot return this value, unlike in the recursive case above, because it
|
||||
// may expose a `SyntaxContext` pointing to incorrect data to arbitrary code.
|
||||
SetEntry::Vacant(entry) => {
|
||||
entry.insert();
|
||||
pending_ctxt
|
||||
}
|
||||
}
|
||||
}
|
||||
Entry::Vacant(entry) => {
|
||||
// We are the first thread to start decoding. Mark the current thread as being
|
||||
// progress.
|
||||
context.local_in_progress.borrow_mut().insert(raw_id);
|
||||
|
||||
// Allocate and store SyntaxContext id *before* calling the decoder function,
|
||||
// as the SyntaxContextData may reference itself.
|
||||
let new_ctxt = HygieneData::with(|hygiene_data| {
|
||||
// Push a dummy SyntaxContextData to ensure that nobody else can get the
|
||||
// same ID as us. This will be overwritten after call `decode_data`.
|
||||
hygiene_data.syntax_context_data.push(SyntaxContextData::decode_placeholder());
|
||||
SyntaxContext::from_usize(hygiene_data.syntax_context_data.len() - 1)
|
||||
});
|
||||
entry.insert(new_ctxt);
|
||||
new_ctxt
|
||||
}
|
||||
}
|
||||
};
|
||||
// Reminder: `HygieneDecodeContext` is per-crate, so there are no collisions between
|
||||
// raw ids from different crate metadatas.
|
||||
if let Some(ctxt) = context.inner.lock().remapped_ctxts.get(raw_id as usize).copied().flatten()
|
||||
{
|
||||
// This has already been decoded.
|
||||
return ctxt;
|
||||
}
|
||||
|
||||
// Don't try to decode data while holding the lock, since we need to
|
||||
// be able to recursively decode a SyntaxContext
|
||||
let ctxt_data = decode_data(d, raw_id);
|
||||
let ctxt_key = ctxt_data.key();
|
||||
|
||||
let ctxt = HygieneData::with(|hygiene_data| {
|
||||
match hygiene_data.syntax_context_map.get(&ctxt_key) {
|
||||
// Ensure that syntax contexts are unique.
|
||||
// If syntax contexts with the given key already exists, reuse it instead of
|
||||
// using `pending_ctxt`.
|
||||
// `pending_ctxt` will leave an unused hole in the vector of syntax contexts.
|
||||
// Hopefully its value isn't stored anywhere during decoding and its dummy data
|
||||
// is never accessed later. The `is_decode_placeholder` asserts on all
|
||||
// accesses to syntax context data attempt to ensure it.
|
||||
Some(&ctxt) => ctxt,
|
||||
// This is a completely new context.
|
||||
// Overwrite its placeholder data with our decoded data.
|
||||
None => {
|
||||
let ctxt_data_ref =
|
||||
&mut hygiene_data.syntax_context_data[pending_ctxt.as_u32() as usize];
|
||||
let prev_ctxt_data = mem::replace(ctxt_data_ref, ctxt_data);
|
||||
// Reset `dollar_crate_name` so that it will be updated by `update_dollar_crate_names`.
|
||||
// We don't care what the encoding crate set this to - we want to resolve it
|
||||
// from the perspective of the current compilation session.
|
||||
ctxt_data_ref.dollar_crate_name = kw::DollarCrate;
|
||||
// Make sure nothing weird happened while `decode_data` was running.
|
||||
if !prev_ctxt_data.is_decode_placeholder() {
|
||||
// Another thread may have already inserted the decoded data,
|
||||
// but the decoded data should match.
|
||||
assert_eq!(prev_ctxt_data, *ctxt_data_ref);
|
||||
}
|
||||
hygiene_data.syntax_context_map.insert(ctxt_key, pending_ctxt);
|
||||
pending_ctxt
|
||||
}
|
||||
}
|
||||
let ctxt_key = (ctxt_data.parent, ctxt_data.outer_expn, ctxt_data.outer_transparency);
|
||||
*hygiene_data.syntax_context_map.entry(ctxt_key).or_insert_with(|| {
|
||||
let ctxt = SyntaxContext::from_usize(hygiene_data.syntax_context_data.len());
|
||||
hygiene_data.syntax_context_data.push(ctxt_data.recursive(ctxt));
|
||||
ctxt
|
||||
})
|
||||
});
|
||||
|
||||
// Mark the context as completed
|
||||
context.local_in_progress.borrow_mut().remove(&raw_id);
|
||||
|
||||
let mut inner = context.inner.lock();
|
||||
let new_len = raw_id as usize + 1;
|
||||
if inner.remapped_ctxts.len() < new_len {
|
||||
inner.remapped_ctxts.resize(new_len, None);
|
||||
}
|
||||
inner.remapped_ctxts[raw_id as usize] = Some(ctxt);
|
||||
inner.decoding.remove(&raw_id);
|
||||
|
||||
ctxt
|
||||
}
|
||||
|
||||
fn for_all_ctxts_in<F: FnMut(u32, SyntaxContext, &SyntaxContextData)>(
|
||||
fn for_all_ctxts_in<F: FnMut(u32, SyntaxContext, &SyntaxContextDataNonRecursive)>(
|
||||
ctxts: impl Iterator<Item = SyntaxContext>,
|
||||
mut f: F,
|
||||
) {
|
||||
let all_data: Vec<_> = HygieneData::with(|data| {
|
||||
ctxts.map(|ctxt| (ctxt, data.syntax_context_data[ctxt.0 as usize].clone())).collect()
|
||||
});
|
||||
let all_data: Vec<_> =
|
||||
HygieneData::with(|data| ctxts.map(|ctxt| (ctxt, data.non_recursive_ctxt(ctxt))).collect());
|
||||
for (ctxt, data) in all_data.into_iter() {
|
||||
f(ctxt.0, ctxt, &data);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue