1
Fork 0

Rollup merge of #139016 - Kobzol:post-merge-analysis-durations, r=marcoieni

Add job duration changes to post-merge analysis report

This should help us observe large regressions in job duration changes.

I would also like to add quick links to GH jobs/workflow to the post-merge workflow, but for that I first need to store some CI metadata to the bootstrap metrics, to make it easier to lookup the corresponding GH workflows (otherwise we'd need to look them up by commit SHA, which would be much more complicated). The last commit adds this metadata. Once this PR is merged, and the metadata will be available in the metrics stored on S3, I'll send a follow-up PR that uses the metadata to add links to job names in the post-merge workflow report.

r? `@marcoieni`
This commit is contained in:
Jacob Pratt 2025-03-27 13:11:21 -04:00 committed by GitHub
commit 1b8089d553
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 120 additions and 9 deletions

View file

@ -69,6 +69,8 @@ jobs:
env:
CI_JOB_NAME: ${{ matrix.name }}
CI_JOB_DOC_URL: ${{ matrix.doc_url }}
GITHUB_WORKFLOW_RUN_ID: ${{ github.run_id }}
GITHUB_REPOSITORY: ${{ github.repository }}
CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse
# commit of PR sha or commit sha. `GITHUB_SHA` is not accurate for PRs.
HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}

View file

@ -9,9 +9,10 @@ use std::fs::File;
use std::io::BufWriter;
use std::time::{Duration, Instant, SystemTime};
use build_helper::ci::CiEnv;
use build_helper::metrics::{
JsonInvocation, JsonInvocationSystemStats, JsonNode, JsonRoot, JsonStepSystemStats, Test,
TestOutcome, TestSuite, TestSuiteMetadata,
CiMetadata, JsonInvocation, JsonInvocationSystemStats, JsonNode, JsonRoot, JsonStepSystemStats,
Test, TestOutcome, TestSuite, TestSuiteMetadata,
};
use sysinfo::{CpuRefreshKind, RefreshKind, System};
@ -217,7 +218,12 @@ impl BuildMetrics {
children: steps.into_iter().map(|step| self.prepare_json_step(step)).collect(),
});
let json = JsonRoot { format_version: CURRENT_FORMAT_VERSION, system_stats, invocations };
let json = JsonRoot {
format_version: CURRENT_FORMAT_VERSION,
system_stats,
invocations,
ci_metadata: get_ci_metadata(CiEnv::current()),
};
t!(std::fs::create_dir_all(dest.parent().unwrap()));
let mut file = BufWriter::new(t!(File::create(&dest)));
@ -245,6 +251,16 @@ impl BuildMetrics {
}
}
fn get_ci_metadata(ci_env: CiEnv) -> Option<CiMetadata> {
if ci_env != CiEnv::GitHubActions {
return None;
}
let workflow_run_id =
std::env::var("GITHUB_WORKFLOW_RUN_ID").ok().and_then(|id| id.parse::<u64>().ok())?;
let repository = std::env::var("GITHUB_REPOSITORY").ok()?;
Some(CiMetadata { workflow_run_id, repository })
}
struct MetricsState {
finished_steps: Vec<StepMetrics>,
running_steps: Vec<StepMetrics>,

View file

@ -9,6 +9,19 @@ pub struct JsonRoot {
pub format_version: usize,
pub system_stats: JsonInvocationSystemStats,
pub invocations: Vec<JsonInvocation>,
#[serde(default)]
pub ci_metadata: Option<CiMetadata>,
}
/// Represents metadata about bootstrap's execution in CI.
#[derive(Serialize, Deserialize)]
pub struct CiMetadata {
/// GitHub run ID of the workflow where bootstrap was executed.
/// Note that the run ID will be shared amongst all jobs executed in that workflow.
pub workflow_run_id: u64,
/// Full name of a GitHub repository where bootstrap was executed in CI.
/// e.g. `rust-lang-ci/rust`.
pub repository: String,
}
#[derive(Serialize, Deserialize)]

View file

@ -1,5 +1,6 @@
use std::collections::{BTreeMap, HashMap, HashSet};
use std::fmt::Debug;
use std::time::Duration;
use build_helper::metrics::{
BuildStep, JsonRoot, TestOutcome, TestSuite, TestSuiteMetadata, escape_step_name,
@ -184,11 +185,70 @@ fn render_table(suites: BTreeMap<String, TestSuiteRecord>) -> String {
}
/// Outputs a report of test differences between the `parent` and `current` commits.
pub fn output_test_diffs(job_metrics: HashMap<JobName, JobMetrics>) {
pub fn output_test_diffs(job_metrics: &HashMap<JobName, JobMetrics>) {
let aggregated_test_diffs = aggregate_test_diffs(&job_metrics);
report_test_diffs(aggregated_test_diffs);
}
/// Prints the ten largest differences in bootstrap durations.
pub fn output_largest_duration_changes(job_metrics: &HashMap<JobName, JobMetrics>) {
struct Entry<'a> {
job: &'a JobName,
before: Duration,
after: Duration,
change: f64,
}
let mut changes: Vec<Entry> = vec![];
for (job, metrics) in job_metrics {
if let Some(parent) = &metrics.parent {
let duration_before = parent
.invocations
.iter()
.map(|i| BuildStep::from_invocation(i).duration)
.sum::<Duration>();
let duration_after = metrics
.current
.invocations
.iter()
.map(|i| BuildStep::from_invocation(i).duration)
.sum::<Duration>();
let pct_change = duration_after.as_secs_f64() / duration_before.as_secs_f64();
let pct_change = pct_change * 100.0;
// Normalize around 100, to get + for regression and - for improvements
let pct_change = pct_change - 100.0;
changes.push(Entry {
job,
before: duration_before,
after: duration_after,
change: pct_change,
});
}
}
changes.sort_by(|e1, e2| e1.change.partial_cmp(&e2.change).unwrap().reverse());
println!("# Job duration changes");
for (index, entry) in changes.into_iter().take(10).enumerate() {
println!(
"{}. `{}`: {:.1}s -> {:.1}s ({:.1}%)",
index + 1,
entry.job,
entry.before.as_secs_f64(),
entry.after.as_secs_f64(),
entry.change
);
}
println!();
output_details("How to interpret the job duration changes?", || {
println!(
r#"Job durations can vary a lot, based on the actual runner instance
that executed the job, system noise, invalidated caches, etc. The table above is provided
mostly for t-infra members, for simpler debugging of potential CI slow-downs."#
);
});
}
#[derive(Default)]
struct TestSuiteRecord {
passed: u64,

View file

@ -15,7 +15,7 @@ use clap::Parser;
use jobs::JobDatabase;
use serde_yaml::Value;
use crate::analysis::output_test_diffs;
use crate::analysis::{output_largest_duration_changes, output_test_diffs};
use crate::cpu_usage::load_cpu_usage;
use crate::datadog::upload_datadog_metric;
use crate::jobs::RunType;
@ -160,7 +160,7 @@ fn postprocess_metrics(
job_name,
JobMetrics { parent: Some(parent_metrics), current: metrics },
)]);
output_test_diffs(job_metrics);
output_test_diffs(&job_metrics);
return Ok(());
}
Err(error) => {
@ -180,7 +180,8 @@ fn post_merge_report(db: JobDatabase, current: String, parent: String) -> anyhow
let metrics = download_auto_job_metrics(&db, &parent, &current)?;
println!("\nComparing {parent} (parent) -> {current} (this PR)\n");
output_test_diffs(metrics);
output_test_diffs(&metrics);
output_largest_duration_changes(&metrics);
Ok(())
}

View file

@ -1,5 +1,5 @@
use std::collections::HashMap;
use std::path::Path;
use std::path::{Path, PathBuf};
use anyhow::Context;
use build_helper::metrics::{JsonNode, JsonRoot, TestSuite};
@ -74,6 +74,17 @@ Maybe it was newly added?"#,
}
pub fn download_job_metrics(job_name: &str, sha: &str) -> anyhow::Result<JsonRoot> {
// Best effort cache to speed-up local re-executions of citool
let cache_path = PathBuf::from(".citool-cache").join(sha).join(format!("{job_name}.json"));
if cache_path.is_file() {
if let Ok(metrics) = std::fs::read_to_string(&cache_path)
.map_err(|err| err.into())
.and_then(|data| anyhow::Ok::<JsonRoot>(serde_json::from_str::<JsonRoot>(&data)?))
{
return Ok(metrics);
}
}
let url = get_metrics_url(job_name, sha);
let mut response = ureq::get(&url).call()?;
if !response.status().is_success() {
@ -87,6 +98,13 @@ pub fn download_job_metrics(job_name: &str, sha: &str) -> anyhow::Result<JsonRoo
.body_mut()
.read_json()
.with_context(|| anyhow::anyhow!("cannot deserialize metrics from {url}"))?;
if let Ok(_) = std::fs::create_dir_all(cache_path.parent().unwrap()) {
if let Ok(data) = serde_json::to_string(&data) {
let _ = std::fs::write(cache_path, data);
}
}
Ok(data)
}

View file

@ -23,7 +23,6 @@ where
println!(
r"<details>
<summary>{summary}</summary>
"
);
func();

View file

@ -355,6 +355,8 @@ docker \
--env GITHUB_ACTIONS \
--env GITHUB_REF \
--env GITHUB_STEP_SUMMARY="/checkout/obj/${SUMMARY_FILE}" \
--env GITHUB_WORKFLOW_RUN_ID \
--env GITHUB_REPOSITORY \
--env RUST_BACKTRACE \
--env TOOLSTATE_REPO_ACCESS_TOKEN \
--env TOOLSTATE_REPO \