@@ -2,13 +2,16 @@ use crate::dep_graph::DepNodeIndex;
2
2
3
3
use rustc_data_structures:: fx:: FxHashMap ;
4
4
use rustc_data_structures:: sharded:: { self , Sharded } ;
5
- use rustc_data_structures:: sync:: { Lock , OnceLock } ;
5
+ use rustc_data_structures:: sync:: { AtomicUsize , OnceLock } ;
6
6
use rustc_hir:: def_id:: LOCAL_CRATE ;
7
- use rustc_index:: { Idx , IndexVec } ;
7
+ use rustc_index:: Idx ;
8
8
use rustc_span:: def_id:: DefId ;
9
9
use rustc_span:: def_id:: DefIndex ;
10
10
use std:: fmt:: Debug ;
11
11
use std:: hash:: Hash ;
12
+ use std:: marker:: PhantomData ;
13
+ use std:: mem:: offset_of;
14
+ use std:: sync:: atomic:: { AtomicPtr , AtomicU32 , Ordering } ;
12
15
13
16
pub trait QueryCache : Sized {
14
17
type Key : Hash + Eq + Copy + Debug ;
@@ -100,13 +103,201 @@ where
100
103
}
101
104
}
102
105
103
- pub struct VecCache < K : Idx , V > {
104
- cache : Lock < IndexVec < K , Option < ( V , DepNodeIndex ) > > > ,
106
+ struct Slot < V > {
107
+ // We never construct &Slot<V> so it's fine for this to not be in an UnsafeCell.
108
+ value : V ,
109
+ // This is both an index and a once-lock.
110
+ //
111
+ // 0: not yet initialized.
112
+ // 1: lock held, initializing.
113
+ // 2..u32::MAX - 2: initialized.
114
+ index_and_lock : AtomicU32 ,
105
115
}
106
116
107
117
impl < K : Idx , V > Default for VecCache < K , V > {
108
118
fn default ( ) -> Self {
109
- VecCache { cache : Default :: default ( ) }
119
+ VecCache {
120
+ buckets : Default :: default ( ) ,
121
+ key : PhantomData ,
122
+ len : Default :: default ( ) ,
123
+ present : Default :: default ( ) ,
124
+ }
125
+ }
126
+ }
127
+
128
+ #[ derive( Copy , Clone , Debug ) ]
129
+ struct SlotIndex {
130
+ bucket_idx : usize ,
131
+ entries : usize ,
132
+ index_in_bucket : usize ,
133
+ }
134
+
135
+ impl SlotIndex {
136
+ #[ inline]
137
+ fn from_index ( idx : u32 ) -> Self {
138
+ let mut bucket = idx. checked_ilog2 ( ) . unwrap_or ( 0 ) as usize ;
139
+ let entries;
140
+ let running_sum;
141
+ if bucket <= 11 {
142
+ entries = 1 << 12 ;
143
+ running_sum = 0 ;
144
+ bucket = 0 ;
145
+ } else {
146
+ entries = 1 << bucket;
147
+ running_sum = entries;
148
+ bucket = bucket - 11 ;
149
+ }
150
+ SlotIndex { bucket_idx : bucket, entries, index_in_bucket : idx as usize - running_sum }
151
+ }
152
+
153
+ #[ inline]
154
+ unsafe fn get < V : Copy > ( & self , buckets : & [ AtomicPtr < Slot < V > > ; 21 ] ) -> Option < ( V , u32 ) > {
155
+ // SAFETY: `bucket_idx` is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,
156
+ // in-bounds of buckets. See `from_index` for computation.
157
+ let bucket = unsafe { buckets. get_unchecked ( self . bucket_idx ) } ;
158
+ let ptr = bucket. load ( Ordering :: Acquire ) ;
159
+ // Bucket is not yet initialized: then we obviously won't find this entry in that bucket.
160
+ if ptr. is_null ( ) {
161
+ return None ;
162
+ }
163
+ // SAFETY: Follows from preconditions on `buckets` and `self`.
164
+ let slot = unsafe { ptr. add ( self . index_in_bucket ) } ;
165
+
166
+ // SAFETY:
167
+ let index_and_lock =
168
+ unsafe { & * slot. byte_add ( offset_of ! ( Slot <V >, index_and_lock) ) . cast :: < AtomicU32 > ( ) } ;
169
+ let current = index_and_lock. load ( Ordering :: Acquire ) ;
170
+ let index = match current {
171
+ 0 => return None ,
172
+ // Treat "initializing" as actually just not initialized at all.
173
+ // The only reason this is a separate state is that `complete` calls could race and
174
+ // we can't allow that, but from load perspective there's no difference.
175
+ 1 => return None ,
176
+ _ => current - 2 ,
177
+ } ;
178
+
179
+ // SAFETY:
180
+ // * slot is a valid pointer (buckets are always valid for the index we get).
181
+ // * value is initialized since we saw a >= 2 index above.
182
+ // * `V: Copy`, so safe to read.
183
+ let value = unsafe { slot. byte_add ( offset_of ! ( Slot <V >, value) ) . cast :: < V > ( ) . read ( ) } ;
184
+ Some ( ( value, index) )
185
+ }
186
+
187
+ /// Returns true if this successfully put into the map.
188
+ #[ inline]
189
+ fn put < V > ( & self , buckets : & [ AtomicPtr < Slot < V > > ; 21 ] , value : V , extra : u32 ) -> bool {
190
+ static LOCK : std:: sync:: Mutex < ( ) > = std:: sync:: Mutex :: new ( ( ) ) ;
191
+
192
+ // SAFETY: `bucket_idx` is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,
193
+ // in-bounds of buckets.
194
+ let bucket = unsafe { buckets. get_unchecked ( self . bucket_idx ) } ;
195
+ let mut ptr = bucket. load ( Ordering :: Acquire ) ;
196
+ let _allocator_guard;
197
+ if ptr. is_null ( ) {
198
+ // If we load null, then acquire the global lock; this path is quite cold, so it's cheap
199
+ // to use a global lock.
200
+ _allocator_guard = LOCK . lock ( ) ;
201
+ // And re-load the value.
202
+ ptr = bucket. load ( Ordering :: Acquire ) ;
203
+ }
204
+
205
+ // OK, now under the allocator lock, if we're still null then it's definitely us that will
206
+ // initialize this bucket.
207
+ if ptr. is_null ( ) {
208
+ let bucket_layout =
209
+ std:: alloc:: Layout :: array :: < Slot < V > > ( self . entries as usize ) . unwrap ( ) ;
210
+ // SAFETY: Always >0 entries in each bucket.
211
+ let allocated = unsafe { std:: alloc:: alloc_zeroed ( bucket_layout) . cast :: < Slot < V > > ( ) } ;
212
+ if allocated. is_null ( ) {
213
+ std:: alloc:: handle_alloc_error ( bucket_layout) ;
214
+ }
215
+ bucket. store ( allocated, Ordering :: Release ) ;
216
+ ptr = allocated;
217
+ }
218
+ assert ! ( !ptr. is_null( ) ) ;
219
+
220
+ // SAFETY: `index_in_bucket` is always in-bounds of the allocated array.
221
+ let slot = unsafe { ptr. add ( self . index_in_bucket ) } ;
222
+
223
+ let index_and_lock =
224
+ unsafe { & * slot. byte_add ( offset_of ! ( Slot <V >, index_and_lock) ) . cast :: < AtomicU32 > ( ) } ;
225
+ match index_and_lock. compare_exchange ( 0 , 1 , Ordering :: Relaxed , Ordering :: Relaxed ) {
226
+ Ok ( _) => {
227
+ // We have acquired the initialization lock. It is our job to write `value` and
228
+ // then set the lock to the real index.
229
+
230
+ unsafe {
231
+ slot. byte_add ( offset_of ! ( Slot <V >, value) ) . cast :: < V > ( ) . write ( value) ;
232
+ }
233
+
234
+ index_and_lock. store ( extra. checked_add ( 2 ) . unwrap ( ) , Ordering :: Release ) ;
235
+
236
+ true
237
+ }
238
+
239
+ // Treat "initializing" as actually initialized: we lost the race and should skip
240
+ // any updates to this slot. In practice this should be unreachable since we're guarded
241
+ // by an external lock that only allows one initialization for any given query result.
242
+ Err ( 1 ) => unreachable ! ( ) ,
243
+
244
+ // This slot was already populated. Also ignore, currently this is the same as
245
+ // "initializing".
246
+ Err ( _) => false ,
247
+ }
248
+ }
249
+ }
250
+
251
+ pub struct VecCache < K : Idx , V > {
252
+ // Entries per bucket:
253
+ // Bucket 0: 4096 2^12
254
+ // Bucket 1: 4096 2^12
255
+ // Bucket 2: 8192
256
+ // Bucket 3: 16384
257
+ // ...
258
+ // Bucket 19: 1073741824
259
+ // Bucket 20: 2147483648
260
+ // The total number of entries if all buckets are initialized is u32::MAX-1.
261
+ buckets : [ AtomicPtr < Slot < V > > ; 21 ] ,
262
+
263
+ // Present and len are only used during incremental and self-profiling.
264
+ // They are an optimization over iterating the full buckets array.
265
+ present : [ AtomicPtr < Slot < ( ) > > ; 21 ] ,
266
+ len : AtomicUsize ,
267
+
268
+ key : PhantomData < K > ,
269
+ }
270
+
271
+ // SAFETY: No access to `V` is made.
272
+ unsafe impl < K : Idx , #[ may_dangle] V > Drop for VecCache < K , V > {
273
+ fn drop ( & mut self ) {
274
+ // We have unique ownership, so no locks etc. are needed. Since `K` and `V` are both `Copy`,
275
+ // we are also guaranteed to just need to deallocate any large arrays (not iterate over
276
+ // contents).
277
+
278
+ let mut entries = 1 << 12 ;
279
+ for bucket in self . buckets . iter ( ) {
280
+ let bucket = bucket. load ( Ordering :: Acquire ) ;
281
+ if !bucket. is_null ( ) {
282
+ let layout = std:: alloc:: Layout :: array :: < Slot < V > > ( entries) . unwrap ( ) ;
283
+ unsafe {
284
+ std:: alloc:: dealloc ( bucket. cast ( ) , layout) ;
285
+ }
286
+ }
287
+ entries *= 2 ;
288
+ }
289
+
290
+ let mut entries = 1 << 12 ;
291
+ for bucket in self . present . iter ( ) {
292
+ let bucket = bucket. load ( Ordering :: Acquire ) ;
293
+ if !bucket. is_null ( ) {
294
+ let layout = std:: alloc:: Layout :: array :: < Slot < V > > ( entries) . unwrap ( ) ;
295
+ unsafe {
296
+ std:: alloc:: dealloc ( bucket. cast ( ) , layout) ;
297
+ }
298
+ }
299
+ entries *= 2 ;
300
+ }
110
301
}
111
302
}
112
303
@@ -120,20 +311,41 @@ where
120
311
121
312
#[ inline( always) ]
122
313
fn lookup ( & self , key : & K ) -> Option < ( V , DepNodeIndex ) > {
123
- let lock = self . cache . lock ( ) ;
124
- if let Some ( Some ( value) ) = lock. get ( * key) { Some ( * value) } else { None }
314
+ let key = u32:: try_from ( key. index ( ) ) . unwrap ( ) ;
315
+ let slot_idx = SlotIndex :: from_index ( key) ;
316
+ match unsafe { slot_idx. get ( & self . buckets ) } {
317
+ Some ( ( value, idx) ) => Some ( ( value, DepNodeIndex :: from_u32 ( idx) ) ) ,
318
+ None => None ,
319
+ }
125
320
}
126
321
127
322
#[ inline]
128
323
fn complete ( & self , key : K , value : V , index : DepNodeIndex ) {
129
- let mut lock = self . cache . lock ( ) ;
130
- lock. insert ( key, ( value, index) ) ;
324
+ let key = u32:: try_from ( key. index ( ) ) . unwrap ( ) ;
325
+ let slot_idx = SlotIndex :: from_index ( key) ;
326
+ if slot_idx. put ( & self . buckets , value, index. index ( ) as u32 ) {
327
+ let present_idx = self . len . fetch_add ( 1 , Ordering :: Relaxed ) ;
328
+ let slot = SlotIndex :: from_index ( present_idx as u32 ) ;
329
+ // We should always be uniquely putting due to `len` fetch_add returning unique values.
330
+ assert ! ( slot. put( & self . present, ( ) , key) ) ;
331
+ }
131
332
}
132
333
133
334
fn iter ( & self , f : & mut dyn FnMut ( & Self :: Key , & Self :: Value , DepNodeIndex ) ) {
134
- for ( k, v) in self . cache . lock ( ) . iter_enumerated ( ) {
135
- if let Some ( v) = v {
136
- f ( & k, & v. 0 , v. 1 ) ;
335
+ for idx in 0 ..self . len . load ( Ordering :: Relaxed ) {
336
+ let key = SlotIndex :: from_index ( idx as u32 ) ;
337
+ match unsafe { key. get ( & self . present ) } {
338
+ // This shouldn't happen in our current usage (iter is really only
339
+ // used long after queries are done running), but if we hit this in practice it's
340
+ // probably fine to just break early.
341
+ None => unreachable ! ( ) ,
342
+ Some ( ( ( ) , key) ) => {
343
+ let key = K :: new ( key as usize ) ;
344
+ // unwrap() is OK: present entries are always written only after we put the real
345
+ // entry.
346
+ let value = self . lookup ( & key) . unwrap ( ) ;
347
+ f ( & key, & value. 0 , value. 1 ) ;
348
+ }
137
349
}
138
350
}
139
351
}
@@ -142,10 +354,7 @@ where
142
354
pub struct DefIdCache < V > {
143
355
/// Stores the local DefIds in a dense map. Local queries are much more often dense, so this is
144
356
/// a win over hashing query keys at marginal memory cost (~5% at most) compared to FxHashMap.
145
- ///
146
- /// The second element of the tuple is the set of keys actually present in the IndexVec, used
147
- /// for faster iteration in `iter()`.
148
- local : Lock < ( IndexVec < DefIndex , Option < ( V , DepNodeIndex ) > > , Vec < DefIndex > ) > ,
357
+ local : VecCache < DefIndex , V > ,
149
358
foreign : DefaultCache < DefId , V > ,
150
359
}
151
360
@@ -165,8 +374,7 @@ where
165
374
#[ inline( always) ]
166
375
fn lookup ( & self , key : & DefId ) -> Option < ( V , DepNodeIndex ) > {
167
376
if key. krate == LOCAL_CRATE {
168
- let cache = self . local . lock ( ) ;
169
- cache. 0 . get ( key. index ) . and_then ( |v| * v)
377
+ self . local . lookup ( & key. index )
170
378
} else {
171
379
self . foreign . lookup ( key)
172
380
}
@@ -175,27 +383,19 @@ where
175
383
#[ inline]
176
384
fn complete ( & self , key : DefId , value : V , index : DepNodeIndex ) {
177
385
if key. krate == LOCAL_CRATE {
178
- let mut cache = self . local . lock ( ) ;
179
- let ( cache, present) = & mut * cache;
180
- let slot = cache. ensure_contains_elem ( key. index , Default :: default) ;
181
- if slot. is_none ( ) {
182
- // FIXME: Only store the present set when running in incremental mode. `iter` is not
183
- // used outside of saving caches to disk and self-profile.
184
- present. push ( key. index ) ;
185
- }
186
- * slot = Some ( ( value, index) ) ;
386
+ self . local . complete ( key. index , value, index)
187
387
} else {
188
388
self . foreign . complete ( key, value, index)
189
389
}
190
390
}
191
391
192
392
fn iter ( & self , f : & mut dyn FnMut ( & Self :: Key , & Self :: Value , DepNodeIndex ) ) {
193
- let guard = self . local . lock ( ) ;
194
- let ( cache, present) = & * guard;
195
- for & idx in present. iter ( ) {
196
- let value = cache[ idx] . unwrap ( ) ;
197
- f ( & DefId { krate : LOCAL_CRATE , index : idx } , & value. 0 , value. 1 ) ;
198
- }
393
+ self . local . iter ( & mut |key, value, index| {
394
+ f ( & DefId { krate : LOCAL_CRATE , index : * key } , value, index) ;
395
+ } ) ;
199
396
self . foreign . iter ( f) ;
200
397
}
201
398
}
399
+
400
+ #[ cfg( test) ]
401
+ mod tests;
0 commit comments