Rollup merge of #139016 - Kobzol:post-merge-analysis-durations, r=marcoieni
Add job duration changes to post-merge analysis report This should help us observe large regressions in job duration changes. I would also like to add quick links to GH jobs/workflow to the post-merge workflow, but for that I first need to store some CI metadata to the bootstrap metrics, to make it easier to lookup the corresponding GH workflows (otherwise we'd need to look them up by commit SHA, which would be much more complicated). The last commit adds this metadata. Once this PR is merged, and the metadata will be available in the metrics stored on S3, I'll send a follow-up PR that uses the metadata to add links to job names in the post-merge workflow report. r? `@marcoieni`
This commit is contained in:
commit
1b8089d553
8 changed files with 120 additions and 9 deletions
2
.github/workflows/ci.yml
vendored
2
.github/workflows/ci.yml
vendored
|
@ -69,6 +69,8 @@ jobs:
|
|||
env:
|
||||
CI_JOB_NAME: ${{ matrix.name }}
|
||||
CI_JOB_DOC_URL: ${{ matrix.doc_url }}
|
||||
GITHUB_WORKFLOW_RUN_ID: ${{ github.run_id }}
|
||||
GITHUB_REPOSITORY: ${{ github.repository }}
|
||||
CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse
|
||||
# commit of PR sha or commit sha. `GITHUB_SHA` is not accurate for PRs.
|
||||
HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
|
|
|
@ -9,9 +9,10 @@ use std::fs::File;
|
|||
use std::io::BufWriter;
|
||||
use std::time::{Duration, Instant, SystemTime};
|
||||
|
||||
use build_helper::ci::CiEnv;
|
||||
use build_helper::metrics::{
|
||||
JsonInvocation, JsonInvocationSystemStats, JsonNode, JsonRoot, JsonStepSystemStats, Test,
|
||||
TestOutcome, TestSuite, TestSuiteMetadata,
|
||||
CiMetadata, JsonInvocation, JsonInvocationSystemStats, JsonNode, JsonRoot, JsonStepSystemStats,
|
||||
Test, TestOutcome, TestSuite, TestSuiteMetadata,
|
||||
};
|
||||
use sysinfo::{CpuRefreshKind, RefreshKind, System};
|
||||
|
||||
|
@ -217,7 +218,12 @@ impl BuildMetrics {
|
|||
children: steps.into_iter().map(|step| self.prepare_json_step(step)).collect(),
|
||||
});
|
||||
|
||||
let json = JsonRoot { format_version: CURRENT_FORMAT_VERSION, system_stats, invocations };
|
||||
let json = JsonRoot {
|
||||
format_version: CURRENT_FORMAT_VERSION,
|
||||
system_stats,
|
||||
invocations,
|
||||
ci_metadata: get_ci_metadata(CiEnv::current()),
|
||||
};
|
||||
|
||||
t!(std::fs::create_dir_all(dest.parent().unwrap()));
|
||||
let mut file = BufWriter::new(t!(File::create(&dest)));
|
||||
|
@ -245,6 +251,16 @@ impl BuildMetrics {
|
|||
}
|
||||
}
|
||||
|
||||
fn get_ci_metadata(ci_env: CiEnv) -> Option<CiMetadata> {
|
||||
if ci_env != CiEnv::GitHubActions {
|
||||
return None;
|
||||
}
|
||||
let workflow_run_id =
|
||||
std::env::var("GITHUB_WORKFLOW_RUN_ID").ok().and_then(|id| id.parse::<u64>().ok())?;
|
||||
let repository = std::env::var("GITHUB_REPOSITORY").ok()?;
|
||||
Some(CiMetadata { workflow_run_id, repository })
|
||||
}
|
||||
|
||||
struct MetricsState {
|
||||
finished_steps: Vec<StepMetrics>,
|
||||
running_steps: Vec<StepMetrics>,
|
||||
|
|
|
@ -9,6 +9,19 @@ pub struct JsonRoot {
|
|||
pub format_version: usize,
|
||||
pub system_stats: JsonInvocationSystemStats,
|
||||
pub invocations: Vec<JsonInvocation>,
|
||||
#[serde(default)]
|
||||
pub ci_metadata: Option<CiMetadata>,
|
||||
}
|
||||
|
||||
/// Represents metadata about bootstrap's execution in CI.
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct CiMetadata {
|
||||
/// GitHub run ID of the workflow where bootstrap was executed.
|
||||
/// Note that the run ID will be shared amongst all jobs executed in that workflow.
|
||||
pub workflow_run_id: u64,
|
||||
/// Full name of a GitHub repository where bootstrap was executed in CI.
|
||||
/// e.g. `rust-lang-ci/rust`.
|
||||
pub repository: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::fmt::Debug;
|
||||
use std::time::Duration;
|
||||
|
||||
use build_helper::metrics::{
|
||||
BuildStep, JsonRoot, TestOutcome, TestSuite, TestSuiteMetadata, escape_step_name,
|
||||
|
@ -184,11 +185,70 @@ fn render_table(suites: BTreeMap<String, TestSuiteRecord>) -> String {
|
|||
}
|
||||
|
||||
/// Outputs a report of test differences between the `parent` and `current` commits.
|
||||
pub fn output_test_diffs(job_metrics: HashMap<JobName, JobMetrics>) {
|
||||
pub fn output_test_diffs(job_metrics: &HashMap<JobName, JobMetrics>) {
|
||||
let aggregated_test_diffs = aggregate_test_diffs(&job_metrics);
|
||||
report_test_diffs(aggregated_test_diffs);
|
||||
}
|
||||
|
||||
/// Prints the ten largest differences in bootstrap durations.
|
||||
pub fn output_largest_duration_changes(job_metrics: &HashMap<JobName, JobMetrics>) {
|
||||
struct Entry<'a> {
|
||||
job: &'a JobName,
|
||||
before: Duration,
|
||||
after: Duration,
|
||||
change: f64,
|
||||
}
|
||||
|
||||
let mut changes: Vec<Entry> = vec![];
|
||||
for (job, metrics) in job_metrics {
|
||||
if let Some(parent) = &metrics.parent {
|
||||
let duration_before = parent
|
||||
.invocations
|
||||
.iter()
|
||||
.map(|i| BuildStep::from_invocation(i).duration)
|
||||
.sum::<Duration>();
|
||||
let duration_after = metrics
|
||||
.current
|
||||
.invocations
|
||||
.iter()
|
||||
.map(|i| BuildStep::from_invocation(i).duration)
|
||||
.sum::<Duration>();
|
||||
let pct_change = duration_after.as_secs_f64() / duration_before.as_secs_f64();
|
||||
let pct_change = pct_change * 100.0;
|
||||
// Normalize around 100, to get + for regression and - for improvements
|
||||
let pct_change = pct_change - 100.0;
|
||||
changes.push(Entry {
|
||||
job,
|
||||
before: duration_before,
|
||||
after: duration_after,
|
||||
change: pct_change,
|
||||
});
|
||||
}
|
||||
}
|
||||
changes.sort_by(|e1, e2| e1.change.partial_cmp(&e2.change).unwrap().reverse());
|
||||
|
||||
println!("# Job duration changes");
|
||||
for (index, entry) in changes.into_iter().take(10).enumerate() {
|
||||
println!(
|
||||
"{}. `{}`: {:.1}s -> {:.1}s ({:.1}%)",
|
||||
index + 1,
|
||||
entry.job,
|
||||
entry.before.as_secs_f64(),
|
||||
entry.after.as_secs_f64(),
|
||||
entry.change
|
||||
);
|
||||
}
|
||||
|
||||
println!();
|
||||
output_details("How to interpret the job duration changes?", || {
|
||||
println!(
|
||||
r#"Job durations can vary a lot, based on the actual runner instance
|
||||
that executed the job, system noise, invalidated caches, etc. The table above is provided
|
||||
mostly for t-infra members, for simpler debugging of potential CI slow-downs."#
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct TestSuiteRecord {
|
||||
passed: u64,
|
||||
|
|
|
@ -15,7 +15,7 @@ use clap::Parser;
|
|||
use jobs::JobDatabase;
|
||||
use serde_yaml::Value;
|
||||
|
||||
use crate::analysis::output_test_diffs;
|
||||
use crate::analysis::{output_largest_duration_changes, output_test_diffs};
|
||||
use crate::cpu_usage::load_cpu_usage;
|
||||
use crate::datadog::upload_datadog_metric;
|
||||
use crate::jobs::RunType;
|
||||
|
@ -160,7 +160,7 @@ fn postprocess_metrics(
|
|||
job_name,
|
||||
JobMetrics { parent: Some(parent_metrics), current: metrics },
|
||||
)]);
|
||||
output_test_diffs(job_metrics);
|
||||
output_test_diffs(&job_metrics);
|
||||
return Ok(());
|
||||
}
|
||||
Err(error) => {
|
||||
|
@ -180,7 +180,8 @@ fn post_merge_report(db: JobDatabase, current: String, parent: String) -> anyhow
|
|||
let metrics = download_auto_job_metrics(&db, &parent, ¤t)?;
|
||||
|
||||
println!("\nComparing {parent} (parent) -> {current} (this PR)\n");
|
||||
output_test_diffs(metrics);
|
||||
output_test_diffs(&metrics);
|
||||
output_largest_duration_changes(&metrics);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use anyhow::Context;
|
||||
use build_helper::metrics::{JsonNode, JsonRoot, TestSuite};
|
||||
|
@ -74,6 +74,17 @@ Maybe it was newly added?"#,
|
|||
}
|
||||
|
||||
pub fn download_job_metrics(job_name: &str, sha: &str) -> anyhow::Result<JsonRoot> {
|
||||
// Best effort cache to speed-up local re-executions of citool
|
||||
let cache_path = PathBuf::from(".citool-cache").join(sha).join(format!("{job_name}.json"));
|
||||
if cache_path.is_file() {
|
||||
if let Ok(metrics) = std::fs::read_to_string(&cache_path)
|
||||
.map_err(|err| err.into())
|
||||
.and_then(|data| anyhow::Ok::<JsonRoot>(serde_json::from_str::<JsonRoot>(&data)?))
|
||||
{
|
||||
return Ok(metrics);
|
||||
}
|
||||
}
|
||||
|
||||
let url = get_metrics_url(job_name, sha);
|
||||
let mut response = ureq::get(&url).call()?;
|
||||
if !response.status().is_success() {
|
||||
|
@ -87,6 +98,13 @@ pub fn download_job_metrics(job_name: &str, sha: &str) -> anyhow::Result<JsonRoo
|
|||
.body_mut()
|
||||
.read_json()
|
||||
.with_context(|| anyhow::anyhow!("cannot deserialize metrics from {url}"))?;
|
||||
|
||||
if let Ok(_) = std::fs::create_dir_all(cache_path.parent().unwrap()) {
|
||||
if let Ok(data) = serde_json::to_string(&data) {
|
||||
let _ = std::fs::write(cache_path, data);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(data)
|
||||
}
|
||||
|
||||
|
|
|
@ -23,7 +23,6 @@ where
|
|||
println!(
|
||||
r"<details>
|
||||
<summary>{summary}</summary>
|
||||
|
||||
"
|
||||
);
|
||||
func();
|
||||
|
|
|
@ -355,6 +355,8 @@ docker \
|
|||
--env GITHUB_ACTIONS \
|
||||
--env GITHUB_REF \
|
||||
--env GITHUB_STEP_SUMMARY="/checkout/obj/${SUMMARY_FILE}" \
|
||||
--env GITHUB_WORKFLOW_RUN_ID \
|
||||
--env GITHUB_REPOSITORY \
|
||||
--env RUST_BACKTRACE \
|
||||
--env TOOLSTATE_REPO_ACCESS_TOKEN \
|
||||
--env TOOLSTATE_REPO \
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue