attributes: help LLVM understand that some spans are never going to do anything (#1600) (#1605)

oli-obk · web-flow · commit 3b038ed3ad12 · 2021-10-01T09:51:02.000-07:00
## Motivation Adding `#[instrument(level = "debug")]` attributes to functions in rustc caused a performance regression (in release, where `debug!` is fully optimized out) across all crates: rust-lang/rust#89048 (comment) While trying to debug this, I noticed that spans don't have the same advantage that events have wrt to how LLVM sees them. Spans (or more precisely, the enter-guard), will get dropped at the end of the scope, which throws a spanner into the LLVM optimization pipeline. I am not entirely sure where the problem is, but I am moderately certain that the issue is that even entering a dummy span is too much code for LLVM to reliably (or at all) optimize out. ## Solution My hope is that in trusting the Rust compiler to generate cool code when using drop flags, we can essentially generate a drop flag that depends on something we know (due to events working as expected) to be optimizable. So instead of doing ```rust let _x = span!(); let _y = _x.enter(); // lotsa code drop(_y) ``` we do ```rust let _x; let _y; let must_drop = false; if level_enabled!(DEBUG) { must_drop = true; _x = span!(); _y = _x.enter(); } // lotsa code if must_drop { drop(_y) } ``` I believe this will allow LLVM to properly optimize this again. Testing that right now, but I wanted to open this PR immediately for review.
diff --git a/tracing-attributes/src/lib.rs b/tracing-attributes/src/lib.rs
@@ -601,6 +601,8 @@ fn gen_block(
         .map(|name| quote!(#name))
         .unwrap_or_else(|| quote!(#instrumented_function_name));
 
+    let level = args.level();
+
     // generate this inside a closure, so we can return early on errors.
     let span = (|| {
         // Pull out the arguments-to-be-skipped first, so we can filter results
@@ -646,7 +648,6 @@ fn gen_block(
             }
         }
 
-        let level = args.level();
         let target = args.target();
 
         // filter out skipped fields
@@ -713,7 +714,9 @@ fn gen_block(
         if err {
             quote_spanned!(block.span()=>
                 let __tracing_attr_span = #span;
-                tracing::Instrument::instrument(async move {
+                // See comment on the default case at the end of this function
+                // for why we do this a bit roundabout.
+                let fut = async move {
                     match async move { #block }.await {
                         #[allow(clippy::unit_arg)]
                         Ok(x) => Ok(x),
@@ -722,22 +725,46 @@ fn gen_block(
                             Err(e)
                         }
                     }
-                }, __tracing_attr_span).await
+                };
+                if tracing::level_enabled!(#level) {
+                    tracing::Instrument::instrument(
+                        fut,
+                        __tracing_attr_span
+                    )
+                    .await
+                } else {
+                    fut.await
+                }
             )
         } else {
             quote_spanned!(block.span()=>
                 let __tracing_attr_span = #span;
+                // See comment on the default case at the end of this function
+                // for why we do this a bit roundabout.
+                let fut = async move { #block };
+                if tracing::level_enabled!(#level) {
                     tracing::Instrument::instrument(
-                        async move { #block },
+                        fut,
                         __tracing_attr_span
                     )
                     .await
+                } else {
+                    fut.await
+                }
             )
         }
     } else if err {
         quote_spanned!(block.span()=>
-            let __tracing_attr_span = #span;
-            let __tracing_attr_guard = __tracing_attr_span.enter();
+            // See comment on the default case at the end of this function
+            // for why we do this a bit roundabout.
+            let __tracing_attr_span;
+            let __tracing_attr_guard;
+            if tracing::level_enabled!(#level) {
+                __tracing_attr_span = #span;
+                __tracing_attr_guard = __tracing_attr_span.enter();
+            }
+            // pacify clippy::suspicious_else_formatting
+            let _ = ();
             #[allow(clippy::redundant_closure_call)]
             match (move || #block)() {
                 #[allow(clippy::unit_arg)]
@@ -750,8 +777,24 @@ fn gen_block(
         )
     } else {
         quote_spanned!(block.span()=>
-            let __tracing_attr_span = #span;
-            let __tracing_attr_guard = __tracing_attr_span.enter();
+            // These variables are left uninitialized and initialized only
+            // if the tracing level is statically enabled at this point.
+            // While the tracing level is also checked at span creation
+            // time, that will still create a dummy span, and a dummy guard
+            // and drop the dummy guard later. By lazily initializing these
+            // variables, Rust will generate a drop flag for them and thus
+            // only drop the guard if it was created. This creates code that
+            // is very straightforward for LLVM to optimize out if the tracing
+            // level is statically disabled, while not causing any performance
+            // regression in case the level is enabled.
+            let __tracing_attr_span;
+            let __tracing_attr_guard;
+            if tracing::level_enabled!(#level) {
+                __tracing_attr_span = #span;
+                __tracing_attr_guard = __tracing_attr_span.enter();
+            }
+            // pacify clippy::suspicious_else_formatting
+            let _ = ();
             #block
         )
     }