@@ -20,6 +20,8 @@ use rustc_data_structures::sharded::Sharded;
20
20
use rustc_data_structures:: thin_vec:: ThinVec ;
21
21
#[ cfg( not( parallel_compiler) ) ]
22
22
use rustc_data_structures:: cold_path;
23
+ #[ cfg( parallel_compiler) ]
24
+ use rustc_data_structures:: profiling:: TimingGuard ;
23
25
use std:: hash:: { Hash , Hasher } ;
24
26
use std:: mem;
25
27
use std:: ptr;
@@ -91,6 +93,19 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
91
93
/// for some compile-time benchmarks.
92
94
#[ inline( always) ]
93
95
pub ( super ) fn try_get ( tcx : TyCtxt < ' tcx > , span : Span , key : & Q :: Key ) -> TryGetJob < ' a , ' tcx , Q > {
96
+ // Handling the `query_blocked_prof_timer` is a bit weird because of the
97
+ // control flow in this function: Blocking is implemented by
98
+ // awaiting a running job and, once that is done, entering the loop below
99
+ // again from the top. In that second iteration we will hit the
100
+ // cache which provides us with the information we need for
101
+ // finishing the "query-blocked" event.
102
+ //
103
+ // We thus allocate `query_blocked_prof_timer` outside the loop,
104
+ // initialize it during the first iteration and finish it during the
105
+ // second iteration.
106
+ #[ cfg( parallel_compiler) ]
107
+ let mut query_blocked_prof_timer: Option < TimingGuard < ' _ > > = None ;
108
+
94
109
let cache = Q :: query_cache ( tcx) ;
95
110
loop {
96
111
// We compute the key's hash once and then use it for both the
@@ -104,7 +119,17 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
104
119
if let Some ( ( _, value) ) =
105
120
lock. results . raw_entry ( ) . from_key_hashed_nocheck ( key_hash, key)
106
121
{
107
- tcx. prof . query_cache_hit ( value. index . into ( ) ) ;
122
+ if unlikely ! ( tcx. prof. enabled( ) ) {
123
+ tcx. prof . query_cache_hit ( value. index . into ( ) ) ;
124
+
125
+ #[ cfg( parallel_compiler) ]
126
+ {
127
+ if let Some ( prof_timer) = query_blocked_prof_timer. take ( ) {
128
+ prof_timer. finish_with_query_invocation_id ( value. index . into ( ) ) ;
129
+ }
130
+ }
131
+ }
132
+
108
133
let result = ( value. value . clone ( ) , value. index ) ;
109
134
#[ cfg( debug_assertions) ]
110
135
{
@@ -113,9 +138,6 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
113
138
return TryGetJob :: JobCompleted ( result) ;
114
139
}
115
140
116
- #[ cfg( parallel_compiler) ]
117
- let query_blocked_prof_timer;
118
-
119
141
let job = match lock. active . entry ( ( * key) . clone ( ) ) {
120
142
Entry :: Occupied ( entry) => {
121
143
match * entry. get ( ) {
@@ -125,7 +147,7 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
125
147
// self-profiler.
126
148
#[ cfg( parallel_compiler) ]
127
149
{
128
- query_blocked_prof_timer = tcx. prof . query_blocked ( Q :: NAME ) ;
150
+ query_blocked_prof_timer = Some ( tcx. prof . query_blocked ( ) ) ;
129
151
}
130
152
131
153
job. clone ( )
@@ -169,11 +191,6 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
169
191
{
170
192
let result = job. r#await ( tcx, span) ;
171
193
172
- // This `drop()` is not strictly necessary as the binding
173
- // would go out of scope anyway. But it's good to have an
174
- // explicit marker of how far the measurement goes.
175
- drop ( query_blocked_prof_timer) ;
176
-
177
194
if let Err ( cycle) = result {
178
195
return TryGetJob :: Cycle ( Q :: handle_cycle_error ( tcx, cycle) ) ;
179
196
}
0 commit comments