Skip to content

Categorize perf runs when posting to GitHub #881

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 63 additions & 48 deletions site/src/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ pub async fn handle_triage(
let end = body.end;
// Compare against self to get next
let master_commits = collector::master_commits().await?;
let comparison = compare(
let comparison = compare_given_commits(
start.clone(),
start.clone(),
"instructions:u".to_owned(),
Expand All @@ -40,7 +40,7 @@ pub async fn handle_triage(
let mut before = start.clone();

loop {
let comparison = match compare(
let comparison = match compare_given_commits(
before,
after.clone(),
"instructions:u".to_owned(),
Expand Down Expand Up @@ -87,16 +87,17 @@ pub async fn handle_compare(
body: api::days::Request,
data: &InputData,
) -> Result<api::days::Response, BoxedError> {
let commits = collector::master_commits().await?;
let master_commits = collector::master_commits().await?;
let end = body.end;
let comparison = crate::comparison::compare(body.start, end.clone(), body.stat, data, &commits)
.await?
.ok_or_else(|| format!("could not find end commit for bound {:?}", end))?;
let comparison =
compare_given_commits(body.start, end.clone(), body.stat, data, &master_commits)
.await?
.ok_or_else(|| format!("could not find end commit for bound {:?}", end))?;

let conn = data.conn().await;
let prev = comparison.prev(&commits);
let next = comparison.next(&commits);
let is_contiguous = comparison.is_contiguous(&*conn, &commits).await;
let prev = comparison.prev(&master_commits);
let next = comparison.next(&master_commits);
let is_contiguous = comparison.is_contiguous(&*conn, &master_commits).await;

Ok(api::days::Response {
prev,
Expand All @@ -108,7 +109,7 @@ pub async fn handle_compare(
}

async fn populate_report(comparison: &Comparison, report: &mut HashMap<Direction, Vec<String>>) {
if let Some(summary) = summarize_comparison(comparison) {
if let Some(summary) = ComparisonSummary::summarize_comparison(comparison) {
if let Some(direction) = summary.direction() {
let entry = report.entry(direction).or_default();

Expand All @@ -117,44 +118,44 @@ async fn populate_report(comparison: &Comparison, report: &mut HashMap<Direction
}
}

fn summarize_comparison<'a>(comparison: &'a Comparison) -> Option<ComparisonSummary<'a>> {
let mut benchmarks = comparison.get_benchmarks();
// Skip empty commits, sometimes happens if there's a compiler bug or so.
if benchmarks.len() == 0 {
return None;
}

let cmp = |b1: &BenchmarkComparison, b2: &BenchmarkComparison| {
b1.log_change()
.partial_cmp(&b2.log_change())
.unwrap_or(std::cmp::Ordering::Equal)
};
let lo = benchmarks
.iter()
.enumerate()
.min_by(|&(_, b1), &(_, b2)| cmp(b1, b2))
.filter(|(_, c)| c.is_significant() && !c.is_increase())
.map(|(i, _)| i);
let lo = lo.map(|lo| benchmarks.remove(lo));
let hi = benchmarks
.iter()
.enumerate()
.max_by(|&(_, b1), &(_, b2)| cmp(b1, b2))
.filter(|(_, c)| c.is_significant() && c.is_increase())
.map(|(i, _)| i);
let hi = hi.map(|hi| benchmarks.remove(hi));

Some(ComparisonSummary { hi, lo })
}

struct ComparisonSummary<'a> {
pub struct ComparisonSummary<'a> {
hi: Option<BenchmarkComparison<'a>>,
lo: Option<BenchmarkComparison<'a>>,
}

impl ComparisonSummary<'_> {
pub fn summarize_comparison<'a>(comparison: &'a Comparison) -> Option<ComparisonSummary<'a>> {
let mut benchmarks = comparison.get_benchmarks();
// Skip empty commits, sometimes happens if there's a compiler bug or so.
if benchmarks.len() == 0 {
return None;
}

let cmp = |b1: &BenchmarkComparison, b2: &BenchmarkComparison| {
b1.log_change()
.partial_cmp(&b2.log_change())
.unwrap_or(std::cmp::Ordering::Equal)
};
let lo = benchmarks
.iter()
.enumerate()
.min_by(|&(_, b1), &(_, b2)| cmp(b1, b2))
.filter(|(_, c)| c.is_significant() && !c.is_increase())
.map(|(i, _)| i);
let lo = lo.map(|lo| benchmarks.remove(lo));
let hi = benchmarks
.iter()
.enumerate()
.max_by(|&(_, b1), &(_, b2)| cmp(b1, b2))
.filter(|(_, c)| c.is_significant() && c.is_increase())
.map(|(i, _)| i);
let hi = hi.map(|hi| benchmarks.remove(hi));

Some(ComparisonSummary { hi, lo })
}

/// The direction of the changes
fn direction(&self) -> Option<Direction> {
pub fn direction(&self) -> Option<Direction> {
let d = match (&self.hi, &self.lo) {
(None, None) => return None,
(Some(b), None) => b.direction(),
Expand All @@ -166,7 +167,7 @@ impl ComparisonSummary<'_> {
}

/// The changes ordered by their signficance (most significant first)
fn ordered_changes(&self) -> Vec<&BenchmarkComparison<'_>> {
pub fn ordered_changes(&self) -> Vec<&BenchmarkComparison<'_>> {
match (&self.hi, &self.lo) {
(None, None) => Vec::new(),
(Some(b), None) => vec![b],
Expand Down Expand Up @@ -202,7 +203,7 @@ impl ComparisonSummary<'_> {

for change in self.ordered_changes() {
write!(result, "- ").unwrap();
change.summary_line(&mut result, link)
change.summary_line(&mut result, Some(link))
}
result
}
Expand All @@ -216,6 +217,17 @@ pub async fn compare(
end: Bound,
stat: String,
data: &InputData,
) -> Result<Option<Comparison>, BoxedError> {
let master_commits = collector::master_commits().await?;
compare_given_commits(start, end, stat, data, &master_commits).await
}

/// Compare two bounds on a given stat
pub async fn compare_given_commits(
start: Bound,
end: Bound,
stat: String,
data: &InputData,
master_commits: &[collector::MasterCommit],
) -> Result<Option<Comparison>, BoxedError> {
let a = data
Expand Down Expand Up @@ -404,7 +416,7 @@ impl Comparison {

// A single comparison based on benchmark and cache state
#[derive(Debug)]
struct BenchmarkComparison<'a> {
pub struct BenchmarkComparison<'a> {
bench_name: &'a str,
cache_state: &'a str,
results: (f64, f64),
Expand Down Expand Up @@ -446,7 +458,7 @@ impl BenchmarkComparison<'_> {
}
}

fn summary_line(&self, summary: &mut String, link: &str) {
pub fn summary_line(&self, summary: &mut String, link: Option<&str>) {
use std::fmt::Write;
let magnitude = self.log_change().abs();
let size = if magnitude > 0.10 {
Expand All @@ -467,7 +479,10 @@ impl BenchmarkComparison<'_> {
"{} {} in [instruction counts]({})",
size,
self.direction(),
link
match link {
Some(l) => l,
None => "",
}
)
.unwrap();
writeln!(
Expand All @@ -481,7 +496,7 @@ impl BenchmarkComparison<'_> {

// The direction of a performance change
#[derive(PartialEq, Eq, Hash)]
enum Direction {
pub enum Direction {
Improvement,
Regression,
Mixed,
Expand Down
53 changes: 51 additions & 2 deletions site/src/github.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use serde::Deserialize;
use database::ArtifactId;
use regex::Regex;
use reqwest::header::USER_AGENT;
use std::{sync::Arc, time::Duration};
use std::{fmt::Write, sync::Arc, time::Duration};

lazy_static::lazy_static! {
static ref BODY_TRY_COMMIT: Regex =
Expand Down Expand Up @@ -631,12 +631,15 @@ pub async fn post_finished(data: &InputData) {
"https://perf.rust-lang.org/compare.html?start={}&end={}",
commit.parent_sha, commit.sha
);
let summary = categorize_benchmark(&commit, data).await;
post_comment(
&data.config,
commit.pr,
format!(
"Finished benchmarking try commit ({}): [comparison url]({}).

**Summary**: {}

Benchmarking this pull request likely means that it is \
perf-sensitive, so we're automatically marking it as not fit \
for rolling up. Please note that if the perf results are \
Comment on lines 643 to 645
Copy link

@LeSeulArtichaut LeSeulArtichaut Jun 23, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just an idea: if the perf results don't return any significant perf changes, maybe this rollup=never isn't necessary?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A perf run is likely done when a change to a perf sensitive part of the compiler is done. It may not result in any significant perf changes when the perf run is done, but due to interaction with other PR's it may still have a perf change when actually landing.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This categorization is also way too insensitive IMO. A perf run with improvements up to 0.6% and a couple of regressions up to 0.8% was marked as not having any significant changes. Many changes >0.2% across multiple benchmarks are unlikely to be noise. rust-lang/rust#86404 (comment)

image

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do think we can fine-tune sensitivity, but until we're even more confident I agree that we shouldn't drop the rollup=never. Our queue doesn't suffer much from it today anyway, to my knowledge.

Expand All @@ -649,10 +652,56 @@ regressions or improvements in the roll up.

@bors rollup=never
@rustbot label: +S-waiting-on-review -S-waiting-on-perf",
commit.sha, comparison_url
commit.sha, comparison_url, summary
),
)
.await;
}
}
}

async fn categorize_benchmark(commit: &database::QueuedCommit, data: &InputData) -> String {
let comparison = match crate::comparison::compare(
collector::Bound::Commit(commit.parent_sha.clone()),
collector::Bound::Commit(commit.sha.clone()),
"instructions:u".to_owned(),
data,
)
.await
{
Ok(Some(c)) => c,
_ => return String::from("ERROR categorizing benchmark run!"),
};
const DISAGREEMENT: &str = "If you disagree with this performance assessment, \
please file an issue in [rust-lang/rustc-perf](https://github.com/rust-lang/rustc-perf/issues/new).";
let (summary, direction) =
match crate::comparison::ComparisonSummary::summarize_comparison(&comparison) {
Some(s) if s.direction().is_some() => {
let direction = s.direction().unwrap();
(s, direction)
}
_ => {
return format!(
"This benchmark run did not return any significant changes.\n\n{}",
DISAGREEMENT
)
}
};

use crate::comparison::Direction;
let category = match direction {
Direction::Improvement => "improvements 🎉",
Direction::Regression => "regressions 😿",
Direction::Mixed => "mixed results 🤷",
};
let mut result = format!(
"This change led to significant {} in compiler performance.\n",
category
);
for change in summary.ordered_changes() {
write!(result, "- ").unwrap();
change.summary_line(&mut result, None)
}
write!(result, "\n{}", DISAGREEMENT).unwrap();
result
}