Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 19cab6b

Browse files
committed
Auto merge of #130324 - petrochenkov:ctxtache, r=oli-obk
hygiene: Ensure uniqueness of `SyntaxContextData`s `SyntaxContextData`s are basically interned with `SyntaxContext`s working as indices, so they are supposed to be unique. However, currently duplicate `SyntaxContextData`s can be created during decoding from metadata or incremental cache. This PR fixes that. cc #129827 (comment)
2 parents f1bc669 + 07328d5 commit 19cab6b

File tree

1 file changed

+120
-73
lines changed

1 file changed

+120
-73
lines changed

‎compiler/rustc_span/src/hygiene.rs‎

Lines changed: 120 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@
2727
use std::cell::RefCell;
2828
use std::collections::hash_map::Entry;
2929
use std::collections::hash_set::Entry as SetEntry;
30-
use std::fmt;
3130
use std::hash::Hash;
3231
use std::sync::Arc;
32+
use std::{fmt, iter, mem};
3333

3434
use rustc_data_structures::fingerprint::Fingerprint;
3535
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
@@ -57,7 +57,11 @@ pub struct SyntaxContext(u32);
5757
impl !Ord for SyntaxContext {}
5858
impl !PartialOrd for SyntaxContext {}
5959

60-
#[derive(Debug, Encodable, Decodable, Clone)]
60+
/// If this part of two syntax contexts is equal, then the whole syntax contexts should be equal.
61+
/// The other fields are only for caching.
62+
type SyntaxContextKey = (SyntaxContext, ExpnId, Transparency);
63+
64+
#[derive(Clone, Copy, PartialEq, Debug, Encodable, Decodable)]
6165
pub struct SyntaxContextData {
6266
outer_expn: ExpnId,
6367
outer_transparency: Transparency,
@@ -70,6 +74,31 @@ pub struct SyntaxContextData {
7074
dollar_crate_name: Symbol,
7175
}
7276

77+
impl SyntaxContextData {
78+
fn root() -> SyntaxContextData {
79+
SyntaxContextData {
80+
outer_expn: ExpnId::root(),
81+
outer_transparency: Transparency::Opaque,
82+
parent: SyntaxContext::root(),
83+
opaque: SyntaxContext::root(),
84+
opaque_and_semitransparent: SyntaxContext::root(),
85+
dollar_crate_name: kw::DollarCrate,
86+
}
87+
}
88+
89+
fn decode_placeholder() -> SyntaxContextData {
90+
SyntaxContextData { dollar_crate_name: kw::Empty, ..SyntaxContextData::root() }
91+
}
92+
93+
fn is_decode_placeholder(&self) -> bool {
94+
self.dollar_crate_name == kw::Empty
95+
}
96+
97+
fn key(&self) -> SyntaxContextKey {
98+
(self.parent, self.outer_expn, self.outer_transparency)
99+
}
100+
}
101+
73102
rustc_index::newtype_index! {
74103
/// A unique ID associated with a macro invocation and expansion.
75104
#[orderable]
@@ -342,7 +371,7 @@ pub(crate) struct HygieneData {
342371
foreign_expn_hashes: FxHashMap<ExpnId, ExpnHash>,
343372
expn_hash_to_expn_id: UnhashMap<ExpnHash, ExpnId>,
344373
syntax_context_data: Vec<SyntaxContextData>,
345-
syntax_context_map: FxHashMap<(SyntaxContext,ExpnId,Transparency), SyntaxContext>,
374+
syntax_context_map: FxHashMap<SyntaxContextKey, SyntaxContext>,
346375
/// Maps the `local_hash` of an `ExpnData` to the next disambiguator value.
347376
/// This is used by `update_disambiguator` to keep track of which `ExpnData`s
348377
/// would have collisions without a disambiguator.
@@ -361,22 +390,16 @@ impl HygieneData {
361390
None,
362391
);
363392

393+
let root_ctxt_data = SyntaxContextData::root();
364394
HygieneData {
365395
local_expn_data: IndexVec::from_elem_n(Some(root_data), 1),
366396
local_expn_hashes: IndexVec::from_elem_n(ExpnHash(Fingerprint::ZERO), 1),
367397
foreign_expn_data: FxHashMap::default(),
368398
foreign_expn_hashes: FxHashMap::default(),
369-
expn_hash_to_expn_id: std::iter::once((ExpnHash(Fingerprint::ZERO), ExpnId::root()))
399+
expn_hash_to_expn_id: iter::once((ExpnHash(Fingerprint::ZERO), ExpnId::root()))
370400
.collect(),
371-
syntax_context_data: vec![SyntaxContextData {
372-
outer_expn: ExpnId::root(),
373-
outer_transparency: Transparency::Opaque,
374-
parent: SyntaxContext(0),
375-
opaque: SyntaxContext(0),
376-
opaque_and_semitransparent: SyntaxContext(0),
377-
dollar_crate_name: kw::DollarCrate,
378-
}],
379-
syntax_context_map: FxHashMap::default(),
401+
syntax_context_data: vec![root_ctxt_data],
402+
syntax_context_map: iter::once((root_ctxt_data.key(), SyntaxContext(0))).collect(),
380403
expn_data_disambiguators: UnhashMap::default(),
381404
}
382405
}
@@ -425,23 +448,28 @@ impl HygieneData {
425448
}
426449

427450
fn normalize_to_macros_2_0(&self, ctxt: SyntaxContext) -> SyntaxContext {
451+
debug_assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
428452
self.syntax_context_data[ctxt.0 as usize].opaque
429453
}
430454

431455
fn normalize_to_macro_rules(&self, ctxt: SyntaxContext) -> SyntaxContext {
456+
debug_assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
432457
self.syntax_context_data[ctxt.0 as usize].opaque_and_semitransparent
433458
}
434459

435460
fn outer_expn(&self, ctxt: SyntaxContext) -> ExpnId {
461+
debug_assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
436462
self.syntax_context_data[ctxt.0 as usize].outer_expn
437463
}
438464

439465
fn outer_mark(&self, ctxt: SyntaxContext) -> (ExpnId, Transparency) {
466+
debug_assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
440467
let data = &self.syntax_context_data[ctxt.0 as usize];
441468
(data.outer_expn, data.outer_transparency)
442469
}
443470

444471
fn parent_ctxt(&self, ctxt: SyntaxContext) -> SyntaxContext {
472+
debug_assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
445473
self.syntax_context_data[ctxt.0 as usize].parent
446474
}
447475

@@ -551,6 +579,7 @@ impl HygieneData {
551579
transparency: Transparency,
552580
) -> SyntaxContext {
553581
let syntax_context_data = &mut self.syntax_context_data;
582+
debug_assert!(!syntax_context_data[ctxt.0 as usize].is_decode_placeholder());
554583
let mut opaque = syntax_context_data[ctxt.0 as usize].opaque;
555584
let mut opaque_and_semitransparent =
556585
syntax_context_data[ctxt.0 as usize].opaque_and_semitransparent;
@@ -561,7 +590,7 @@ impl HygieneData {
561590
.syntax_context_map
562591
.entry((parent, expn_id, transparency))
563592
.or_insert_with(|| {
564-
let new_opaque = SyntaxContext(syntax_context_data.len()asu32);
593+
let new_opaque = SyntaxContext::from_usize(syntax_context_data.len());
565594
syntax_context_data.push(SyntaxContextData {
566595
outer_expn: expn_id,
567596
outer_transparency: transparency,
@@ -581,7 +610,7 @@ impl HygieneData {
581610
.entry((parent, expn_id, transparency))
582611
.or_insert_with(|| {
583612
let new_opaque_and_semitransparent =
584-
SyntaxContext(syntax_context_data.len()asu32);
613+
SyntaxContext::from_usize(syntax_context_data.len());
585614
syntax_context_data.push(SyntaxContextData {
586615
outer_expn: expn_id,
587616
outer_transparency: transparency,
@@ -596,8 +625,6 @@ impl HygieneData {
596625

597626
let parent = ctxt;
598627
*self.syntax_context_map.entry((parent, expn_id, transparency)).or_insert_with(|| {
599-
let new_opaque_and_semitransparent_and_transparent =
600-
SyntaxContext(syntax_context_data.len() as u32);
601628
syntax_context_data.push(SyntaxContextData {
602629
outer_expn: expn_id,
603630
outer_transparency: transparency,
@@ -606,7 +633,7 @@ impl HygieneData {
606633
opaque_and_semitransparent,
607634
dollar_crate_name: kw::DollarCrate,
608635
});
609-
new_opaque_and_semitransparent_and_transparent
636+
SyntaxContext::from_usize(syntax_context_data.len() - 1)
610637
})
611638
}
612639
}
@@ -626,25 +653,26 @@ pub fn walk_chain_collapsed(span: Span, to: Span) -> Span {
626653

627654
pub fn update_dollar_crate_names(mut get_name: impl FnMut(SyntaxContext) -> Symbol) {
628655
// The new contexts that need updating are at the end of the list and have `$crate` as a name.
629-
let (len, to_update) = HygieneData::with(|data| {
630-
(
631-
data.syntax_context_data.len(),
632-
data.syntax_context_data
633-
.iter()
634-
.rev()
635-
.take_while(|scdata| scdata.dollar_crate_name == kw::DollarCrate)
636-
.count(),
637-
)
656+
// Also decoding placeholders can be encountered among both old and new contexts.
657+
let mut to_update = vec![];
658+
HygieneData::with(|data| {
659+
for (idx, scdata) in data.syntax_context_data.iter().enumerate().rev() {
660+
if scdata.dollar_crate_name == kw::DollarCrate {
661+
to_update.push((idx, kw::DollarCrate));
662+
} else if !scdata.is_decode_placeholder() {
663+
break;
664+
}
665+
}
638666
});
639667
// The callback must be called from outside of the `HygieneData` lock,
640668
// since it will try to acquire it too.
641-
let range_to_update = len - to_update..len;
642-
let names:Vec<_> =
643-
range_to_update.clone().map(|idx| get_name(SyntaxContext::from_u32(idx asu32))).collect();
669+
for(idx, name)in&mut to_update{
670+
*name = get_name(SyntaxContext::from_usize(*idx));
671+
}
644672
HygieneData::with(|data| {
645-
range_to_update.zip(names).for_each(|(idx, name)| {
673+
for(idx, name)in to_update {
646674
data.syntax_context_data[idx].dollar_crate_name = name;
647-
})
675+
}
648676
})
649677
}
650678

@@ -713,6 +741,10 @@ impl SyntaxContext {
713741
SyntaxContext(raw as u32)
714742
}
715743

744+
fn from_usize(raw: usize) -> SyntaxContext {
745+
SyntaxContext(u32::try_from(raw).unwrap())
746+
}
747+
716748
/// Extend a syntax context with a given expansion and transparency.
717749
pub fn apply_mark(self, expn_id: ExpnId, transparency: Transparency) -> SyntaxContext {
718750
HygieneData::with(|data| data.apply_mark(self, expn_id, transparency))
@@ -893,7 +925,10 @@ impl SyntaxContext {
893925
}
894926

895927
pub(crate) fn dollar_crate_name(self) -> Symbol {
896-
HygieneData::with(|data| data.syntax_context_data[self.0 as usize].dollar_crate_name)
928+
HygieneData::with(|data| {
929+
debug_assert!(!data.syntax_context_data[self.0 as usize].is_decode_placeholder());
930+
data.syntax_context_data[self.0 as usize].dollar_crate_name
931+
})
897932
}
898933

899934
pub fn edition(self) -> Edition {
@@ -1244,7 +1279,7 @@ impl HygieneEncodeContext {
12441279

12451280
// Consume the current round of SyntaxContexts.
12461281
// Drop the lock() temporary early
1247-
let latest_ctxts = { std::mem::take(&mut *self.latest_ctxts.lock()) };
1282+
let latest_ctxts = { mem::take(&mut *self.latest_ctxts.lock()) };
12481283

12491284
// It's fine to iterate over a HashMap, because the serialization
12501285
// of the table that we insert data into doesn't depend on insertion
@@ -1256,7 +1291,7 @@ impl HygieneEncodeContext {
12561291
}
12571292
});
12581293

1259-
let latest_expns = { std::mem::take(&mut *self.latest_expns.lock()) };
1294+
let latest_expns = { mem::take(&mut *self.latest_expns.lock()) };
12601295

12611296
// Same as above, this is fine as we are inserting into a order-independent hashset
12621297
#[allow(rustc::potential_query_instability)]
@@ -1373,28 +1408,33 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
13731408
return SyntaxContext::root();
13741409
}
13751410

1376-
let ctxt = {
1411+
let pending_ctxt = {
13771412
let mut inner = context.inner.lock();
13781413

1414+
// Reminder: `HygieneDecodeContext` is per-crate, so there are no collisions between
1415+
// raw ids from different crate metadatas.
13791416
if let Some(ctxt) = inner.remapped_ctxts.get(raw_id as usize).copied().flatten() {
13801417
// This has already been decoded.
13811418
return ctxt;
13821419
}
13831420

13841421
match inner.decoding.entry(raw_id) {
13851422
Entry::Occupied(ctxt_entry) => {
1423+
let pending_ctxt = *ctxt_entry.get();
13861424
match context.local_in_progress.borrow_mut().entry(raw_id) {
1387-
SetEntry::Occupied(..) => {
1388-
// We're decoding this already on the current thread. Return here
1389-
// and let the function higher up the stack finish decoding to handle
1390-
// recursive cases.
1391-
return *ctxt_entry.get();
1392-
}
1425+
// We're decoding this already on the current thread. Return here and let the
1426+
// function higher up the stack finish decoding to handle recursive cases.
1427+
// Hopefully having a `SyntaxContext` that refers to an incorrect data is ok
1428+
// during reminder of the decoding process, it's certainly not ok after the
1429+
// top level decoding function returns.
1430+
SetEntry::Occupied(..) => return pending_ctxt,
1431+
// Some other thread is currently decoding this.
1432+
// Race with it (alternatively we could wait here).
1433+
// We cannot return this value, unlike in the recursive case above, because it
1434+
// may expose a `SyntaxContext` pointing to incorrect data to arbitrary code.
13931435
SetEntry::Vacant(entry) => {
13941436
entry.insert();
1395-
1396-
// Some other thread is current decoding this. Race with it.
1397-
*ctxt_entry.get()
1437+
pending_ctxt
13981438
}
13991439
}
14001440
}
@@ -1405,18 +1445,10 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
14051445
// Allocate and store SyntaxContext id *before* calling the decoder function,
14061446
// as the SyntaxContextData may reference itself.
14071447
let new_ctxt = HygieneData::with(|hygiene_data| {
1408-
let new_ctxt = SyntaxContext(hygiene_data.syntax_context_data.len() as u32);
14091448
// Push a dummy SyntaxContextData to ensure that nobody else can get the
1410-
// same ID as us. This will be overwritten after call `decode_Data`
1411-
hygiene_data.syntax_context_data.push(SyntaxContextData {
1412-
outer_expn: ExpnId::root(),
1413-
outer_transparency: Transparency::Transparent,
1414-
parent: SyntaxContext::root(),
1415-
opaque: SyntaxContext::root(),
1416-
opaque_and_semitransparent: SyntaxContext::root(),
1417-
dollar_crate_name: kw::Empty,
1418-
});
1419-
new_ctxt
1449+
// same ID as us. This will be overwritten after call `decode_data`.
1450+
hygiene_data.syntax_context_data.push(SyntaxContextData::decode_placeholder());
1451+
SyntaxContext::from_usize(hygiene_data.syntax_context_data.len() - 1)
14201452
});
14211453
entry.insert(new_ctxt);
14221454
new_ctxt
@@ -1426,27 +1458,42 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
14261458

14271459
// Don't try to decode data while holding the lock, since we need to
14281460
// be able to recursively decode a SyntaxContext
1429-
let mut ctxt_data = decode_data(d, raw_id);
1430-
// Reset `dollar_crate_name` so that it will be updated by `update_dollar_crate_names`
1431-
// We don't care what the encoding crate set this to - we want to resolve it
1432-
// from the perspective of the current compilation session
1433-
ctxt_data.dollar_crate_name = kw::DollarCrate;
1434-
1435-
// Overwrite the dummy data with our decoded SyntaxContextData
1436-
HygieneData::with(|hygiene_data| {
1437-
if let Some(old) = hygiene_data.syntax_context_data.get(raw_id as usize)
1438-
&& old.outer_expn == ctxt_data.outer_expn
1439-
&& old.outer_transparency == ctxt_data.outer_transparency
1440-
&& old.parent == ctxt_data.parent
1441-
{
1442-
ctxt_data = old.clone();
1461+
let ctxt_data = decode_data(d, raw_id);
1462+
let ctxt_key = ctxt_data.key();
1463+
1464+
let ctxt = HygieneData::with(|hygiene_data| {
1465+
match hygiene_data.syntax_context_map.get(&ctxt_key) {
1466+
// Ensure that syntax contexts are unique.
1467+
// If syntax contexts with the given key already exists, reuse it instead of
1468+
// using `pending_ctxt`.
1469+
// `pending_ctxt` will leave an unused hole in the vector of syntax contexts.
1470+
// Hopefully its value isn't stored anywhere during decoding and its dummy data
1471+
// is never accessed later. The `is_decode_placeholder` asserts on all
1472+
// accesses to syntax context data attempt to ensure it.
1473+
Some(&ctxt) => ctxt,
1474+
// This is a completely new context.
1475+
// Overwrite its placeholder data with our decoded data.
1476+
None => {
1477+
let ctxt_data_ref =
1478+
&mut hygiene_data.syntax_context_data[pending_ctxt.as_u32() as usize];
1479+
let prev_ctxt_data = mem::replace(ctxt_data_ref, ctxt_data);
1480+
// Reset `dollar_crate_name` so that it will be updated by `update_dollar_crate_names`.
1481+
// We don't care what the encoding crate set this to - we want to resolve it
1482+
// from the perspective of the current compilation session.
1483+
ctxt_data_ref.dollar_crate_name = kw::DollarCrate;
1484+
// Make sure nothing weird happened while `decode_data` was running.
1485+
if !prev_ctxt_data.is_decode_placeholder() {
1486+
// Another thread may have already inserted the decoded data,
1487+
// but the decoded data should match.
1488+
assert_eq!(prev_ctxt_data, *ctxt_data_ref);
1489+
}
1490+
hygiene_data.syntax_context_map.insert(ctxt_key, pending_ctxt);
1491+
pending_ctxt
1492+
}
14431493
}
1444-
1445-
hygiene_data.syntax_context_data[ctxt.as_u32() as usize] = ctxt_data;
14461494
});
14471495

14481496
// Mark the context as completed
1449-
14501497
context.local_in_progress.borrow_mut().remove(&raw_id);
14511498

14521499
let mut inner = context.inner.lock();

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /