Allow for stable-only benchmarks.

nnethercote · nnethercote · commit 58440075157c · 2022-03-15T15:08:37.000+11:00
Currently, every benchmark has a category defined by the
`perf-config.json`, either explicitly or implicitly (where the default
is "secondary" if not specified).

This commit allows for a benchmark to have no category, so long as it
has `supports_stable` set.

- The ability to implicitly specify the category has been removed.

- These benchmarks are not considered primary or secondary from the point
  of view of the user, though they are marked as primary in the DB.

- These benchmarks are indicated in `perf-config.json` by setting
  `supports_stable` to true and leaving `category` unspecified.

- These benchmarks are run by `bench_published`, but not by
  `bench_local`, `bench_next`, or `profile_local`.

It is still possible for a benchmark to set `supports_stable` *and*
still have a category, and several benchmarks fit into this category.
However, many of these benchmarks will soon have their category removed,
which means they will only be measured as part of the "stable" set.
diff --git a/collector/src/execute.rs b/collector/src/execute.rs
@@ -131,7 +131,11 @@ struct BenchmarkConfig {
     #[serde(default)]
     touch_file: Option<String>,
 
-    category: Category,
+    /// If this field is `None` when read from `perf-config.json`:
+    /// - If `supports_stable` is true, it will be replaced with
+    ///   `Some(Primary)` when the result is recorded in the DB.
+    /// - If `supports_stable` is false, an error will be returned.
+    category: Option<Category>,
 }
 
 #[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Hash)]
@@ -1227,6 +1231,9 @@ impl Benchmark {
         } else {
             bail!("missing a perf-config.json file for `{}`", name);
         };
+        if config.category.is_none() && !config.supports_stable {
+            bail!("bad perf-config.json for `{}`: category == None && !supports_stable", name);
+        }
 
         Ok(Benchmark {
             name: BenchmarkName(name),
@@ -1240,8 +1247,8 @@ impl Benchmark {
         self.config.supports_stable
     }
 
-    pub fn category(&self) -> &Category {
-        &self.config.category
+    pub fn category(&self) -> Option<Category> {
+        self.config.category
     }
 
     #[cfg(windows)]
diff --git a/collector/src/main.rs b/collector/src/main.rs
@@ -242,7 +242,11 @@ fn bench(
         measure_and_record(
             &benchmark.name,
             benchmark.supports_stable(),
-            benchmark.category().clone(),
+            benchmark.category().unwrap_or_else(|| {
+                // Stable-only benchmarks are marked in the DB as "primary".
+                assert!(benchmark.supports_stable());
+                Category::Primary
+            }),
             &|| {
                 eprintln!(
                     "{}",
@@ -924,11 +928,12 @@ fn main_result() -> anyhow::Result<i32> {
                 "",
             )?;
 
-            let benchmarks = get_benchmarks(
+            let mut benchmarks = get_benchmarks(
                 &benchmark_dir,
                 local.include.as_deref(),
                 local.exclude.as_deref(),
             )?;
+            benchmarks.retain(|b| b.category().is_some());
 
             let res = bench(
                 &mut rt,
@@ -978,11 +983,12 @@ fn main_result() -> anyhow::Result<i32> {
             let sysroot = Sysroot::install(commit.sha.to_string(), &target_triple)
                 .with_context(|| format!("failed to install sysroot for {:?}", commit))?;
 
-            let benchmarks = get_benchmarks(
+            let mut benchmarks = get_benchmarks(
                 &benchmark_dir,
                 next.include.as_deref(),
                 next.exclude.as_deref(),
             )?;
+            benchmarks.retain(|b| b.category().is_some());
 
             let res = bench(
                 &mut rt,
@@ -1077,11 +1083,13 @@ fn main_result() -> anyhow::Result<i32> {
             let profiles = Profile::expand_all(&local.profiles);
             let scenarios = Scenario::expand_all(&local.scenarios);
 
-            let benchmarks = get_benchmarks(
+            let mut benchmarks = get_benchmarks(
                 &benchmark_dir,
                 local.include.as_deref(),
                 local.exclude.as_deref(),
             )?;
+            benchmarks.retain(|b| b.category().is_some());
+
             let mut errors = BenchmarkErrors::new();
 
             let mut get_toolchain_and_profile =
diff --git a/database/src/lib.rs b/database/src/lib.rs
@@ -770,19 +770,13 @@ pub struct BenchmarkData {
     pub category: Category,
 }
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
 #[serde(rename_all = "lowercase")]
 pub enum Category {
     Primary,
     Secondary,
 }
 
-impl Default for Category {
-    fn default() -> Self {
-        Self::Secondary
-    }
-}
-
 impl fmt::Display for Category {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         match self {
diff --git a/docs/glossary.md b/docs/glossary.md
@@ -9,7 +9,7 @@ The following is a glossary of domain specific terminology. Although benchmarks
 * **scenario**: The scenario under which a user is compiling their code. Currently, this is the incremental cache state and an optional change in the source since last compilation (e.g., full incremental cache and a `println!` statement is added).  
 * **metric**: a name of a quantifiable metric being measured (e.g., instruction count)
 * **artifact**: a specific version of rustc (usually a commit sha or some sort of human readable "tag" like 1.51.0)
-* **category**: a high-level group of benchmarks. Currently, there are two categories, primary (mostly real-world crates) and secondary (mostly stress tests).
+* **category**: a high-level group of benchmarks. Currently, there are two categories, primary (mostly real-world crates) and secondary (mostly stress tests). There are also some benchmarks that are neither primary nor secondary, being only measured as part of the "stable" set measured for the dashboard.
 
 ## Benchmarks
 
@@ -44,4 +44,4 @@ The following is a glossary of domain specific terminology. Although benchmarks
 ## Other 
 
 * **bootstrap**: the process of building the compiler from a previous version of the compiler
-* **compiler query**: a query used inside the [compiler query system](https://rustc-dev-guide.rust-lang.org/overview.html#queries).
+* **compiler query**: a query used inside the [compiler query system](https://rustc-dev-guide.rust-lang.org/overview.html#queries).