Skip to content

Commit 98faffd

Browse files
committed
Auto merge of rust-lang#139228 - petrochenkov:ctxtdecod5, r=<try>
hygiene: Remove all caching in syntax context decoding rust-lang#129827 unintentionally removed one caching layer in syntax context decoding (rust-lang#129827 (comment)), and it was a perf regression. However, it didn't remove all the infrastructure and locks supporting that caching layer. Let's actually try to double down on that change, remove everything and see what happens. If it doesn't work out, we'll try just try to re-land rust-lang#129827 without the `remapped_ctxts` removal. cc `@bvanjoi`
2 parents e2014e8 + fdc1439 commit 98faffd

File tree

5 files changed

+24
-164
lines changed

5 files changed

+24
-164
lines changed

compiler/rustc_metadata/src/rmeta/decoder.rs

+1-10
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ use rustc_serialize::{Decodable, Decoder};
3131
use rustc_session::Session;
3232
use rustc_session::config::TargetModifier;
3333
use rustc_session::cstore::{CrateSource, ExternCrate};
34-
use rustc_span::hygiene::HygieneDecodeContext;
3534
use rustc_span::{BytePos, DUMMY_SP, Pos, SpanData, SpanDecoder, SyntaxContext, kw};
3635
use tracing::debug;
3736

@@ -127,13 +126,6 @@ pub(crate) struct CrateMetadata {
127126
/// The crate was used non-speculatively.
128127
used: bool,
129128

130-
/// Additional data used for decoding `HygieneData` (e.g. `SyntaxContext`
131-
/// and `ExpnId`).
132-
/// Note that we store a `HygieneDecodeContext` for each `CrateMetadata`. This is
133-
/// because `SyntaxContext` ids are not globally unique, so we need
134-
/// to track which ids we've decoded on a per-crate basis.
135-
hygiene_context: HygieneDecodeContext,
136-
137129
// --- Data used only for improving diagnostics ---
138130
/// Information about the `extern crate` item or path that caused this crate to be loaded.
139131
/// If this is `None`, then the crate was injected (e.g., by the allocator).
@@ -470,7 +462,7 @@ impl<'a, 'tcx> SpanDecoder for DecodeContext<'a, 'tcx> {
470462
};
471463

472464
let cname = cdata.root.name();
473-
rustc_span::hygiene::decode_syntax_context(self, &cdata.hygiene_context, |_, id| {
465+
rustc_span::hygiene::decode_syntax_context(self, |_, id| {
474466
debug!("SpecializedDecoder<SyntaxContext>: decoding {}", id);
475467
cdata
476468
.root
@@ -1864,7 +1856,6 @@ impl CrateMetadata {
18641856
host_hash,
18651857
used: false,
18661858
extern_crate: None,
1867-
hygiene_context: Default::default(),
18681859
def_key_cache: Default::default(),
18691860
};
18701861

compiler/rustc_metadata/src/rmeta/mod.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ use rustc_serialize::opaque::FileEncoder;
3636
use rustc_session::config::{SymbolManglingVersion, TargetModifier};
3737
use rustc_session::cstore::{CrateDepKind, ForeignModule, LinkagePreference, NativeLib};
3838
use rustc_span::edition::Edition;
39-
use rustc_span::hygiene::{ExpnIndex, MacroKind, SyntaxContextData};
39+
use rustc_span::hygiene::{ExpnIndex, MacroKind, SyntaxContextKey};
4040
use rustc_span::{self, ExpnData, ExpnHash, ExpnId, Ident, Span, Symbol};
4141
use rustc_target::spec::{PanicStrategy, TargetTuple};
4242
use table::TableBuilder;
@@ -193,7 +193,7 @@ enum LazyState {
193193
Previous(NonZero<usize>),
194194
}
195195

196-
type SyntaxContextTable = LazyTable<u32, Option<LazyValue<SyntaxContextData>>>;
196+
type SyntaxContextTable = LazyTable<u32, Option<LazyValue<SyntaxContextKey>>>;
197197
type ExpnDataTable = LazyTable<ExpnIndex, Option<LazyValue<ExpnData>>>;
198198
type ExpnHashTable = LazyTable<ExpnIndex, Option<LazyValue<ExpnHash>>>;
199199

compiler/rustc_middle/src/query/on_disk_cache.rs

+6-14
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@ use rustc_query_system::query::QuerySideEffect;
1515
use rustc_serialize::opaque::{FileEncodeResult, FileEncoder, IntEncodedWithFixedSize, MemDecoder};
1616
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
1717
use rustc_session::Session;
18-
use rustc_span::hygiene::{
19-
ExpnId, HygieneDecodeContext, HygieneEncodeContext, SyntaxContext, SyntaxContextData,
20-
};
18+
use rustc_span::hygiene::{ExpnId, HygieneEncodeContext, SyntaxContext, SyntaxContextKey};
2119
use rustc_span::source_map::Spanned;
2220
use rustc_span::{
2321
BytePos, CachingSourceMapView, ExpnData, ExpnHash, Pos, RelativeBytePos, SourceFile, Span,
@@ -75,9 +73,9 @@ pub struct OnDiskCache {
7573
alloc_decoding_state: AllocDecodingState,
7674

7775
// A map from syntax context ids to the position of their associated
78-
// `SyntaxContextData`. We use a `u32` instead of a `SyntaxContext`
76+
// `SyntaxContextKey`. We use a `u32` instead of a `SyntaxContext`
7977
// to represent the fact that we are storing *encoded* ids. When we decode
80-
// a `SyntaxContext`, a new id will be allocated from the global `HygieneData`,
78+
// a `SyntaxContextKey`, a new id will be allocated from the global `HygieneData`,
8179
// which will almost certainly be different than the serialized id.
8280
syntax_contexts: FxHashMap<u32, AbsoluteBytePos>,
8381
// A map from the `DefPathHash` of an `ExpnId` to the position
@@ -90,8 +88,6 @@ pub struct OnDiskCache {
9088
// we could look up the `ExpnData` from the metadata of foreign crates,
9189
// but it seemed easier to have `OnDiskCache` be independent of the `CStore`.
9290
expn_data: UnhashMap<ExpnHash, AbsoluteBytePos>,
93-
// Additional information used when decoding hygiene data.
94-
hygiene_context: HygieneDecodeContext,
9591
// Maps `ExpnHash`es to their raw value from the *previous*
9692
// compilation session. This is used as an initial 'guess' when
9793
// we try to map an `ExpnHash` to its value in the current
@@ -183,7 +179,6 @@ impl OnDiskCache {
183179
syntax_contexts: footer.syntax_contexts,
184180
expn_data: footer.expn_data,
185181
foreign_expn_data: footer.foreign_expn_data,
186-
hygiene_context: Default::default(),
187182
})
188183
}
189184

@@ -199,7 +194,6 @@ impl OnDiskCache {
199194
syntax_contexts: FxHashMap::default(),
200195
expn_data: UnhashMap::default(),
201196
foreign_expn_data: UnhashMap::default(),
202-
hygiene_context: Default::default(),
203197
}
204198
}
205199

@@ -305,7 +299,7 @@ impl OnDiskCache {
305299
let mut expn_data = UnhashMap::default();
306300
let mut foreign_expn_data = UnhashMap::default();
307301

308-
// Encode all hygiene data (`SyntaxContextData` and `ExpnData`) from the current
302+
// Encode all hygiene data (`SyntaxContextKey` and `ExpnData`) from the current
309303
// session.
310304

311305
hygiene_encode_context.encode(
@@ -428,7 +422,6 @@ impl OnDiskCache {
428422
syntax_contexts: &self.syntax_contexts,
429423
expn_data: &self.expn_data,
430424
foreign_expn_data: &self.foreign_expn_data,
431-
hygiene_context: &self.hygiene_context,
432425
};
433426
f(&mut decoder)
434427
}
@@ -448,7 +441,6 @@ pub struct CacheDecoder<'a, 'tcx> {
448441
syntax_contexts: &'a FxHashMap<u32, AbsoluteBytePos>,
449442
expn_data: &'a UnhashMap<ExpnHash, AbsoluteBytePos>,
450443
foreign_expn_data: &'a UnhashMap<ExpnHash, u32>,
451-
hygiene_context: &'a HygieneDecodeContext,
452444
}
453445

454446
impl<'a, 'tcx> CacheDecoder<'a, 'tcx> {
@@ -561,12 +553,12 @@ impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for Vec<u8> {
561553
impl<'a, 'tcx> SpanDecoder for CacheDecoder<'a, 'tcx> {
562554
fn decode_syntax_context(&mut self) -> SyntaxContext {
563555
let syntax_contexts = self.syntax_contexts;
564-
rustc_span::hygiene::decode_syntax_context(self, self.hygiene_context, |this, id| {
556+
rustc_span::hygiene::decode_syntax_context(self, |this, id| {
565557
// This closure is invoked if we haven't already decoded the data for the `SyntaxContext` we are deserializing.
566558
// We look up the position of the associated `SyntaxData` and decode it.
567559
let pos = syntax_contexts.get(&id).unwrap();
568560
this.with_position(pos.to_usize(), |decoder| {
569-
let data: SyntaxContextData = decode_tagged(decoder, TAG_SYNTAX_CONTEXT);
561+
let data: SyntaxContextKey = decode_tagged(decoder, TAG_SYNTAX_CONTEXT);
570562
data
571563
})
572564
})

compiler/rustc_middle/src/ty/parameterized.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ trivially_parameterized_over_tcx! {
111111
rustc_span::Span,
112112
rustc_span::Symbol,
113113
rustc_span::def_id::DefPathHash,
114-
rustc_span::hygiene::SyntaxContextData,
114+
rustc_span::hygiene::SyntaxContextKey,
115115
rustc_span::Ident,
116116
rustc_type_ir::Variance,
117117
rustc_hir::Attribute,

compiler/rustc_span/src/hygiene.rs

+14-137
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,14 @@
2424
// because getting it wrong can lead to nested `HygieneData::with` calls that
2525
// trigger runtime aborts. (Fortunately these are obvious and easy to fix.)
2626

27-
use std::cell::RefCell;
28-
use std::collections::hash_map::Entry;
29-
use std::collections::hash_set::Entry as SetEntry;
3027
use std::hash::Hash;
3128
use std::sync::Arc;
3229
use std::{fmt, iter, mem};
3330

3431
use rustc_data_structures::fingerprint::Fingerprint;
3532
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
3633
use rustc_data_structures::stable_hasher::{HashStable, HashingControls, StableHasher};
37-
use rustc_data_structures::sync::{Lock, WorkerLocal};
34+
use rustc_data_structures::sync::Lock;
3835
use rustc_data_structures::unhash::UnhashMap;
3936
use rustc_hashes::Hash64;
4037
use rustc_index::IndexVec;
@@ -59,10 +56,10 @@ impl !PartialOrd for SyntaxContext {}
5956

6057
/// If this part of two syntax contexts is equal, then the whole syntax contexts should be equal.
6158
/// The other fields are only for caching.
62-
type SyntaxContextKey = (SyntaxContext, ExpnId, Transparency);
59+
pub type SyntaxContextKey = (SyntaxContext, ExpnId, Transparency);
6360

6461
#[derive(Clone, Copy, PartialEq, Debug, Encodable, Decodable)]
65-
pub struct SyntaxContextData {
62+
struct SyntaxContextData {
6663
outer_expn: ExpnId,
6764
outer_transparency: Transparency,
6865
parent: SyntaxContext,
@@ -1266,7 +1263,7 @@ impl HygieneEncodeContext {
12661263
pub fn encode<T>(
12671264
&self,
12681265
encoder: &mut T,
1269-
mut encode_ctxt: impl FnMut(&mut T, u32, &SyntaxContextData),
1266+
mut encode_ctxt: impl FnMut(&mut T, u32, &SyntaxContextKey),
12701267
mut encode_expn: impl FnMut(&mut T, ExpnId, &ExpnData, ExpnHash),
12711268
) {
12721269
// When we serialize a `SyntaxContextData`, we may end up serializing
@@ -1286,9 +1283,9 @@ impl HygieneEncodeContext {
12861283
// of the table that we insert data into doesn't depend on insertion
12871284
// order
12881285
#[allow(rustc::potential_query_instability)]
1289-
for_all_ctxts_in(latest_ctxts.into_iter(), |index, ctxt, data| {
1286+
for_all_ctxts_in(latest_ctxts.into_iter(), |index, ctxt, ctxt_key| {
12901287
if self.serialized_ctxts.lock().insert(ctxt) {
1291-
encode_ctxt(encoder, index, data);
1288+
encode_ctxt(encoder, index, ctxt_key);
12921289
}
12931290
});
12941291

@@ -1306,29 +1303,6 @@ impl HygieneEncodeContext {
13061303
}
13071304
}
13081305

1309-
#[derive(Default)]
1310-
/// Additional information used to assist in decoding hygiene data
1311-
struct HygieneDecodeContextInner {
1312-
// Maps serialized `SyntaxContext` ids to a `SyntaxContext` in the current
1313-
// global `HygieneData`. When we deserialize a `SyntaxContext`, we need to create
1314-
// a new id in the global `HygieneData`. This map tracks the ID we end up picking,
1315-
// so that multiple occurrences of the same serialized id are decoded to the same
1316-
// `SyntaxContext`. This only stores `SyntaxContext`s which are completely decoded.
1317-
remapped_ctxts: Vec<Option<SyntaxContext>>,
1318-
1319-
/// Maps serialized `SyntaxContext` ids that are currently being decoded to a `SyntaxContext`.
1320-
decoding: FxHashMap<u32, SyntaxContext>,
1321-
}
1322-
1323-
#[derive(Default)]
1324-
/// Additional information used to assist in decoding hygiene data
1325-
pub struct HygieneDecodeContext {
1326-
inner: Lock<HygieneDecodeContextInner>,
1327-
1328-
/// A set of serialized `SyntaxContext` ids that are currently being decoded on each thread.
1329-
local_in_progress: WorkerLocal<RefCell<FxHashSet<u32>>>,
1330-
}
1331-
13321306
/// Register an expansion which has been decoded from the on-disk-cache for the local crate.
13331307
pub fn register_local_expn_id(data: ExpnData, hash: ExpnHash) -> ExpnId {
13341308
HygieneData::with(|hygiene_data| {
@@ -1393,14 +1367,11 @@ pub fn decode_expn_id(
13931367
register_expn_id(krate, index, expn_data, hash)
13941368
}
13951369

1396-
// Decodes `SyntaxContext`, using the provided `HygieneDecodeContext`
1397-
// to track which `SyntaxContext`s we have already decoded.
1398-
// The provided closure will be invoked to deserialize a `SyntaxContextData`
1399-
// if we haven't already seen the id of the `SyntaxContext` we are deserializing.
1400-
pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContextData>(
1370+
// Decodes `SyntaxContext`.
1371+
// The provided closure will be invoked to deserialize a `SyntaxContextData`.
1372+
pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContextKey>(
14011373
d: &mut D,
1402-
context: &HygieneDecodeContext,
1403-
decode_data: F,
1374+
decode_ctxt_key: F,
14041375
) -> SyntaxContext {
14051376
let raw_id: u32 = Decodable::decode(d);
14061377
if raw_id == 0 {
@@ -1409,115 +1380,21 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
14091380
return SyntaxContext::root();
14101381
}
14111382

1412-
let pending_ctxt = {
1413-
let mut inner = context.inner.lock();
1414-
1415-
// Reminder: `HygieneDecodeContext` is per-crate, so there are no collisions between
1416-
// raw ids from different crate metadatas.
1417-
if let Some(ctxt) = inner.remapped_ctxts.get(raw_id as usize).copied().flatten() {
1418-
// This has already been decoded.
1419-
return ctxt;
1420-
}
1421-
1422-
match inner.decoding.entry(raw_id) {
1423-
Entry::Occupied(ctxt_entry) => {
1424-
let pending_ctxt = *ctxt_entry.get();
1425-
match context.local_in_progress.borrow_mut().entry(raw_id) {
1426-
// We're decoding this already on the current thread. Return here and let the
1427-
// function higher up the stack finish decoding to handle recursive cases.
1428-
// Hopefully having a `SyntaxContext` that refers to an incorrect data is ok
1429-
// during reminder of the decoding process, it's certainly not ok after the
1430-
// top level decoding function returns.
1431-
SetEntry::Occupied(..) => return pending_ctxt,
1432-
// Some other thread is currently decoding this.
1433-
// Race with it (alternatively we could wait here).
1434-
// We cannot return this value, unlike in the recursive case above, because it
1435-
// may expose a `SyntaxContext` pointing to incorrect data to arbitrary code.
1436-
SetEntry::Vacant(entry) => {
1437-
entry.insert();
1438-
pending_ctxt
1439-
}
1440-
}
1441-
}
1442-
Entry::Vacant(entry) => {
1443-
// We are the first thread to start decoding. Mark the current thread as being
1444-
// progress.
1445-
context.local_in_progress.borrow_mut().insert(raw_id);
1446-
1447-
// Allocate and store SyntaxContext id *before* calling the decoder function,
1448-
// as the SyntaxContextData may reference itself.
1449-
let new_ctxt = HygieneData::with(|hygiene_data| {
1450-
// Push a dummy SyntaxContextData to ensure that nobody else can get the
1451-
// same ID as us. This will be overwritten after call `decode_data`.
1452-
hygiene_data.syntax_context_data.push(SyntaxContextData::decode_placeholder());
1453-
SyntaxContext::from_usize(hygiene_data.syntax_context_data.len() - 1)
1454-
});
1455-
entry.insert(new_ctxt);
1456-
new_ctxt
1457-
}
1458-
}
1459-
};
1460-
14611383
// Don't try to decode data while holding the lock, since we need to
14621384
// be able to recursively decode a SyntaxContext
1463-
let ctxt_data = decode_data(d, raw_id);
1464-
let ctxt_key = ctxt_data.key();
1465-
1466-
let ctxt = HygieneData::with(|hygiene_data| {
1467-
match hygiene_data.syntax_context_map.get(&ctxt_key) {
1468-
// Ensure that syntax contexts are unique.
1469-
// If syntax contexts with the given key already exists, reuse it instead of
1470-
// using `pending_ctxt`.
1471-
// `pending_ctxt` will leave an unused hole in the vector of syntax contexts.
1472-
// Hopefully its value isn't stored anywhere during decoding and its dummy data
1473-
// is never accessed later. The `is_decode_placeholder` asserts on all
1474-
// accesses to syntax context data attempt to ensure it.
1475-
Some(&ctxt) => ctxt,
1476-
// This is a completely new context.
1477-
// Overwrite its placeholder data with our decoded data.
1478-
None => {
1479-
let ctxt_data_ref =
1480-
&mut hygiene_data.syntax_context_data[pending_ctxt.as_u32() as usize];
1481-
let prev_ctxt_data = mem::replace(ctxt_data_ref, ctxt_data);
1482-
// Reset `dollar_crate_name` so that it will be updated by `update_dollar_crate_names`.
1483-
// We don't care what the encoding crate set this to - we want to resolve it
1484-
// from the perspective of the current compilation session.
1485-
ctxt_data_ref.dollar_crate_name = kw::DollarCrate;
1486-
// Make sure nothing weird happened while `decode_data` was running.
1487-
if !prev_ctxt_data.is_decode_placeholder() {
1488-
// Another thread may have already inserted the decoded data,
1489-
// but the decoded data should match.
1490-
assert_eq!(prev_ctxt_data, *ctxt_data_ref);
1491-
}
1492-
hygiene_data.syntax_context_map.insert(ctxt_key, pending_ctxt);
1493-
pending_ctxt
1494-
}
1495-
}
1496-
});
1497-
1498-
// Mark the context as completed
1499-
context.local_in_progress.borrow_mut().remove(&raw_id);
1500-
1501-
let mut inner = context.inner.lock();
1502-
let new_len = raw_id as usize + 1;
1503-
if inner.remapped_ctxts.len() < new_len {
1504-
inner.remapped_ctxts.resize(new_len, None);
1505-
}
1506-
inner.remapped_ctxts[raw_id as usize] = Some(ctxt);
1507-
inner.decoding.remove(&raw_id);
1508-
1509-
ctxt
1385+
let (parent, expn_id, transparency) = decode_ctxt_key(d, raw_id);
1386+
HygieneData::with(|hygiene_data| hygiene_data.alloc_ctxt(parent, expn_id, transparency))
15101387
}
15111388

1512-
fn for_all_ctxts_in<F: FnMut(u32, SyntaxContext, &SyntaxContextData)>(
1389+
fn for_all_ctxts_in<F: FnMut(u32, SyntaxContext, &SyntaxContextKey)>(
15131390
ctxts: impl Iterator<Item = SyntaxContext>,
15141391
mut f: F,
15151392
) {
15161393
let all_data: Vec<_> = HygieneData::with(|data| {
15171394
ctxts.map(|ctxt| (ctxt, data.syntax_context_data[ctxt.0 as usize].clone())).collect()
15181395
});
15191396
for (ctxt, data) in all_data.into_iter() {
1520-
f(ctxt.0, ctxt, &data);
1397+
f(ctxt.0, ctxt, &data.key());
15211398
}
15221399
}
15231400

0 commit comments

Comments
 (0)