Skip to content

Fix races conditions with SyntaxContext decoding #115082

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions compiler/rustc_data_structures/src/sync/worker_local.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,9 @@ impl<T> Deref for WorkerLocal<T> {
unsafe { &self.locals.get_unchecked(self.registry.id().verify()).0 }
}
}

impl<T: Default> Default for WorkerLocal<T> {
fn default() -> Self {
WorkerLocal::new(|_| T::default())
}
}
120 changes: 85 additions & 35 deletions compiler/rustc_span/src/hygiene.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,13 @@ use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_data_structures::stable_hasher::HashingControls;
use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher};
use rustc_data_structures::sync::{Lock, Lrc};
use rustc_data_structures::sync::{Lock, Lrc, WorkerLocal};
use rustc_data_structures::unhash::UnhashMap;
use rustc_index::IndexVec;
use rustc_macros::HashStable_Generic;
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
use std::cell::RefCell;
use std::collections::hash_map::Entry;
use std::fmt;
use std::hash::Hash;

Expand Down Expand Up @@ -1241,13 +1243,25 @@ impl HygieneEncodeContext {

#[derive(Default)]
/// Additional information used to assist in decoding hygiene data
pub struct HygieneDecodeContext {
struct HygieneDecodeContextInner {
// Maps serialized `SyntaxContext` ids to a `SyntaxContext` in the current
// global `HygieneData`. When we deserialize a `SyntaxContext`, we need to create
// a new id in the global `HygieneData`. This map tracks the ID we end up picking,
// so that multiple occurrences of the same serialized id are decoded to the same
// `SyntaxContext`
remapped_ctxts: Lock<Vec<Option<SyntaxContext>>>,
// `SyntaxContext`. This only stores `SyntaxContext`s which are completly decoded.
remapped_ctxts: Vec<Option<SyntaxContext>>,

/// Maps serialized `SyntaxContext` ids that are currently being decoded to a `SyntaxContext`.
decoding: FxHashMap<u32, SyntaxContext>,
}

#[derive(Default)]
/// Additional information used to assist in decoding hygiene data
pub struct HygieneDecodeContext {
inner: Lock<HygieneDecodeContextInner>,

/// A set of serialized `SyntaxContext` ids that are currently being decoded on each thread.
local_in_progress: WorkerLocal<RefCell<FxHashMap<u32, ()>>>,
}

/// Register an expansion which has been decoded from the on-disk-cache for the local crate.
Expand Down Expand Up @@ -1331,38 +1345,56 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
return SyntaxContext::root();
}

let outer_ctxts = &context.remapped_ctxts;
let ctxt = {
let mut inner = context.inner.lock();

// Ensure that the lock() temporary is dropped early
{
if let Some(ctxt) = outer_ctxts.lock().get(raw_id as usize).copied().flatten() {
if let Some(ctxt) = inner.remapped_ctxts.get(raw_id as usize).copied().flatten() {
// This has already beeen decoded.
return ctxt;
}
}

// Allocate and store SyntaxContext id *before* calling the decoder function,
// as the SyntaxContextData may reference itself.
let new_ctxt = HygieneData::with(|hygiene_data| {
let new_ctxt = SyntaxContext(hygiene_data.syntax_context_data.len() as u32);
// Push a dummy SyntaxContextData to ensure that nobody else can get the
// same ID as us. This will be overwritten after call `decode_Data`
hygiene_data.syntax_context_data.push(SyntaxContextData {
outer_expn: ExpnId::root(),
outer_transparency: Transparency::Transparent,
parent: SyntaxContext::root(),
opaque: SyntaxContext::root(),
opaque_and_semitransparent: SyntaxContext::root(),
dollar_crate_name: kw::Empty,
});
let mut ctxts = outer_ctxts.lock();
let new_len = raw_id as usize + 1;
if ctxts.len() < new_len {
ctxts.resize(new_len, None);
match inner.decoding.entry(raw_id) {
Entry::Occupied(ctxt_entry) => {
match context.local_in_progress.borrow_mut().entry(raw_id) {
Entry::Occupied(..) => {
// We're decoding this already on the current thread. Return here
// and let the function higher up the stack finish decoding to handle
// recursive cases.
return *ctxt_entry.get();
}
Entry::Vacant(entry) => {
entry.insert(());

// Some other thread is current decoding this. Race with it.
*ctxt_entry.get()
}
}
}
Entry::Vacant(entry) => {
// We are the first thread to start decoding. Mark the current thread as being progress.
context.local_in_progress.borrow_mut().insert(raw_id, ());

// Allocate and store SyntaxContext id *before* calling the decoder function,
// as the SyntaxContextData may reference itself.
let new_ctxt = HygieneData::with(|hygiene_data| {
let new_ctxt = SyntaxContext(hygiene_data.syntax_context_data.len() as u32);
// Push a dummy SyntaxContextData to ensure that nobody else can get the
// same ID as us. This will be overwritten after call `decode_Data`
hygiene_data.syntax_context_data.push(SyntaxContextData {
outer_expn: ExpnId::root(),
outer_transparency: Transparency::Transparent,
parent: SyntaxContext::root(),
opaque: SyntaxContext::root(),
opaque_and_semitransparent: SyntaxContext::root(),
dollar_crate_name: kw::Empty,
});
new_ctxt
});
entry.insert(new_ctxt);
new_ctxt
}
}
ctxts[raw_id as usize] = Some(new_ctxt);
drop(ctxts);
new_ctxt
});
};

// Don't try to decode data while holding the lock, since we need to
// be able to recursively decode a SyntaxContext
Expand All @@ -1375,14 +1407,32 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
// Overwrite the dummy data with our decoded SyntaxContextData
HygieneData::with(|hygiene_data| {
let dummy = std::mem::replace(
&mut hygiene_data.syntax_context_data[new_ctxt.as_u32() as usize],
&mut hygiene_data.syntax_context_data[ctxt.as_u32() as usize],
ctxt_data,
);
// Make sure nothing weird happening while `decode_data` was running
assert_eq!(dummy.dollar_crate_name, kw::Empty);
if cfg!(not(parallel_compiler)) {
// Make sure nothing weird happened while `decode_data` was running.
// We used `kw::Empty` for the dummy value and we expect nothing to be
// modifying the dummy entry.
// This does not hold for the parallel compiler as another thread may
// have inserted the fully decoded data.
assert_eq!(dummy.dollar_crate_name, kw::Empty);
}
});

new_ctxt
// Mark the context as completed

context.local_in_progress.borrow_mut().remove(&raw_id);

let mut inner = context.inner.lock();
let new_len = raw_id as usize + 1;
if inner.remapped_ctxts.len() < new_len {
inner.remapped_ctxts.resize(new_len, None);
}
inner.remapped_ctxts[raw_id as usize] = Some(ctxt);
inner.decoding.remove(&raw_id);

ctxt
}

fn for_all_ctxts_in<F: FnMut(u32, SyntaxContext, &SyntaxContextData)>(
Expand Down