Rollup merge of #138531 - Kobzol:test-diff-try-build, r=marcoieni

Store test diffs in job summaries and improve analysis formatting This PR stores the test diffs that we already have in the post-merge workflow also into individual job summaries. This makes it easier to compare test (and later also other) diffs per job, which will be especially useful for try jobs, so that we can actually see the test diffs *before* we merge a given PR. As a drive-by, I also made a bunch of cleanups in `citool` and in the formatting of the summary and post-merge analyses. These changes are split into self-contained commits. The analysis can be tested locally with the following command: ```bash $ curl https://ci-artifacts.rust-lang.org/rustc-builds/<current-sha>/metrics-<job-name>.json > metrics.json $ cargo run --manifest-path src/ci/citool/Cargo.toml postprocess-metrics metrics.json --job-name <job-name> --parent <parent-sha> > out.md ``` For example, for [this PR](https://github.com/rust-lang/rust/pull/138523): ```bash $ curl https://ci-artifacts.rust-lang.org/rustc-builds/282865097d138c7f0f7a7566db5b761312dd145c/metrics-aarch64-gnu.json > metrics.json $ cargo run --manifest-path src/ci/citool/Cargo.toml postprocess-metrics metrics.json --job-name aarch64-gnu --parent d9e5539a39 > out.md ``` Best reviewed commit by commit. r? `@marcoieni` try-job: aarch64-gnu try-job: dist-x86_64-linux
2025-03-17 22:49:05 +01:00 · 2025-03-17 22:49:05 +01:00 · 5786233b81
commit 5786233b81
parent 01062ba233 c9d314773e
6 changed files with 532 additions and 472 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -239,16 +239,31 @@ jobs:
        if: github.event_name == 'push' || env.DEPLOY == '1' || env.DEPLOY_ALT == '1'

      - name: postprocess metrics into the summary
+        # This step is not critical, and if some I/O problem happens, we don't want
+        # to cancel the build.
+        continue-on-error: true
        run: |
          if [ -f build/metrics.json ]; then
-            ./build/citool/debug/citool postprocess-metrics build/metrics.json ${GITHUB_STEP_SUMMARY}
+            METRICS=build/metrics.json
          elif [ -f obj/build/metrics.json ]; then
-            ./build/citool/debug/citool postprocess-metrics obj/build/metrics.json ${GITHUB_STEP_SUMMARY}
+            METRICS=obj/build/metrics.json
          else
            echo "No metrics.json found"
+            exit 0
          fi

+          # Get closest bors merge commit
+          PARENT_COMMIT=`git rev-list --author='bors <bors@rust-lang.org>' -n1 --first-parent HEAD^1`
+
+          ./build/citool/debug/citool postprocess-metrics \
+              --job-name ${CI_JOB_NAME} \
+              --parent ${PARENT_COMMIT} \
+              ${METRICS} >> ${GITHUB_STEP_SUMMARY}
+
      - name: upload job metrics to DataDog
+        # This step is not critical, and if some I/O problem happens, we don't want
+        # to cancel the build.
+        continue-on-error: true
        if: needs.calculate_matrix.outputs.run_type != 'pr'
        env:
          DATADOG_API_KEY: ${{ secrets.DATADOG_API_KEY }}
--- a/src/ci/citool/src/analysis.rs
+++ b/src/ci/citool/src/analysis.rs
@ -0,0 +1,362 @@
+use std::collections::{BTreeMap, HashMap, HashSet};
+
+use build_helper::metrics::{
+    BuildStep, JsonRoot, TestOutcome, TestSuite, TestSuiteMetadata, format_build_steps,
+};
+
+use crate::metrics;
+use crate::metrics::{JobMetrics, JobName, get_test_suites};
+use crate::utils::{output_details, pluralize};
+
+pub fn output_bootstrap_stats(metrics: &JsonRoot) {
+    if !metrics.invocations.is_empty() {
+        println!("# Bootstrap steps");
+        record_bootstrap_step_durations(&metrics);
+        record_test_suites(&metrics);
+    }
+}
+
+fn record_bootstrap_step_durations(metrics: &JsonRoot) {
+    for invocation in &metrics.invocations {
+        let step = BuildStep::from_invocation(invocation);
+        let table = format_build_steps(&step);
+        eprintln!("Step `{}`\n{table}\n", invocation.cmdline);
+        output_details(&invocation.cmdline, || {
+            println!("<pre><code>{table}</code></pre>");
+        });
+    }
+    eprintln!("Recorded {} bootstrap invocation(s)", metrics.invocations.len());
+}
+
+fn record_test_suites(metrics: &JsonRoot) {
+    let suites = metrics::get_test_suites(&metrics);
+
+    if !suites.is_empty() {
+        let aggregated = aggregate_test_suites(&suites);
+        let table = render_table(aggregated);
+        println!("\n# Test results\n");
+        println!("{table}");
+    } else {
+        eprintln!("No test suites found in metrics");
+    }
+}
+
+fn render_table(suites: BTreeMap<String, TestSuiteRecord>) -> String {
+    use std::fmt::Write;
+
+    let mut table = "| Test suite | Passed ✅ | Ignored 🚫 | Failed  ❌ |\n".to_string();
+    writeln!(table, "|:------|------:|------:|------:|").unwrap();
+
+    fn compute_pct(value: f64, total: f64) -> f64 {
+        if total == 0.0 { 0.0 } else { value / total }
+    }
+
+    fn write_row(
+        buffer: &mut String,
+        name: &str,
+        record: &TestSuiteRecord,
+        surround: &str,
+    ) -> std::fmt::Result {
+        let TestSuiteRecord { passed, ignored, failed } = record;
+        let total = (record.passed + record.ignored + record.failed) as f64;
+        let passed_pct = compute_pct(*passed as f64, total) * 100.0;
+        let ignored_pct = compute_pct(*ignored as f64, total) * 100.0;
+        let failed_pct = compute_pct(*failed as f64, total) * 100.0;
+
+        write!(buffer, "| {surround}{name}{surround} |")?;
+        write!(buffer, " {surround}{passed} ({passed_pct:.0}%){surround} |")?;
+        write!(buffer, " {surround}{ignored} ({ignored_pct:.0}%){surround} |")?;
+        writeln!(buffer, " {surround}{failed} ({failed_pct:.0}%){surround} |")?;
+
+        Ok(())
+    }
+
+    let mut total = TestSuiteRecord::default();
+    for (name, record) in suites {
+        write_row(&mut table, &name, &record, "").unwrap();
+        total.passed += record.passed;
+        total.ignored += record.ignored;
+        total.failed += record.failed;
+    }
+    write_row(&mut table, "Total", &total, "**").unwrap();
+    table
+}
+
+/// Computes a post merge CI analysis report of test differences
+/// between the `parent` and `current` commits.
+pub fn output_test_diffs(job_metrics: HashMap<JobName, JobMetrics>) {
+    let aggregated_test_diffs = aggregate_test_diffs(&job_metrics);
+    report_test_diffs(aggregated_test_diffs);
+}
+
+#[derive(Default)]
+struct TestSuiteRecord {
+    passed: u64,
+    ignored: u64,
+    failed: u64,
+}
+
+fn test_metadata_name(metadata: &TestSuiteMetadata) -> String {
+    match metadata {
+        TestSuiteMetadata::CargoPackage { crates, stage, .. } => {
+            format!("{} (stage {stage})", crates.join(", "))
+        }
+        TestSuiteMetadata::Compiletest { suite, stage, .. } => {
+            format!("{suite} (stage {stage})")
+        }
+    }
+}
+
+fn aggregate_test_suites(suites: &[&TestSuite]) -> BTreeMap<String, TestSuiteRecord> {
+    let mut records: BTreeMap<String, TestSuiteRecord> = BTreeMap::new();
+    for suite in suites {
+        let name = test_metadata_name(&suite.metadata);
+        let record = records.entry(name).or_default();
+        for test in &suite.tests {
+            match test.outcome {
+                TestOutcome::Passed => {
+                    record.passed += 1;
+                }
+                TestOutcome::Failed => {
+                    record.failed += 1;
+                }
+                TestOutcome::Ignored { .. } => {
+                    record.ignored += 1;
+                }
+            }
+        }
+    }
+    records
+}
+
+/// Represents a difference in the outcome of tests between a base and a current commit.
+/// Maps test diffs to jobs that contained them.
+#[derive(Debug)]
+struct AggregatedTestDiffs {
+    diffs: HashMap<TestDiff, Vec<JobName>>,
+}
+
+fn aggregate_test_diffs(jobs: &HashMap<JobName, JobMetrics>) -> AggregatedTestDiffs {
+    let mut diffs: HashMap<TestDiff, Vec<JobName>> = HashMap::new();
+
+    // Aggregate test suites
+    for (name, metrics) in jobs {
+        if let Some(parent) = &metrics.parent {
+            let tests_parent = aggregate_tests(parent);
+            let tests_current = aggregate_tests(&metrics.current);
+            for diff in calculate_test_diffs(tests_parent, tests_current) {
+                diffs.entry(diff).or_default().push(name.to_string());
+            }
+        }
+    }
+
+    AggregatedTestDiffs { diffs }
+}
+
+#[derive(Eq, PartialEq, Hash, Debug)]
+enum TestOutcomeDiff {
+    ChangeOutcome { before: TestOutcome, after: TestOutcome },
+    Missing { before: TestOutcome },
+    Added(TestOutcome),
+}
+
+#[derive(Eq, PartialEq, Hash, Debug)]
+struct TestDiff {
+    test: Test,
+    diff: TestOutcomeDiff,
+}
+
+fn calculate_test_diffs(parent: TestSuiteData, current: TestSuiteData) -> HashSet<TestDiff> {
+    let mut diffs = HashSet::new();
+    for (test, outcome) in &current.tests {
+        match parent.tests.get(test) {
+            Some(before) => {
+                if before != outcome {
+                    diffs.insert(TestDiff {
+                        test: test.clone(),
+                        diff: TestOutcomeDiff::ChangeOutcome {
+                            before: before.clone(),
+                            after: outcome.clone(),
+                        },
+                    });
+                }
+            }
+            None => {
+                diffs.insert(TestDiff {
+                    test: test.clone(),
+                    diff: TestOutcomeDiff::Added(outcome.clone()),
+                });
+            }
+        }
+    }
+    for (test, outcome) in &parent.tests {
+        if !current.tests.contains_key(test) {
+            diffs.insert(TestDiff {
+                test: test.clone(),
+                diff: TestOutcomeDiff::Missing { before: outcome.clone() },
+            });
+        }
+    }
+
+    diffs
+}
+
+/// Aggregates test suite executions from all bootstrap invocations in a given CI job.
+#[derive(Default)]
+struct TestSuiteData {
+    tests: HashMap<Test, TestOutcome>,
+}
+
+#[derive(Hash, PartialEq, Eq, Debug, Clone)]
+struct Test {
+    name: String,
+    is_doctest: bool,
+}
+
+/// Extracts all tests from the passed metrics and map them to their outcomes.
+fn aggregate_tests(metrics: &JsonRoot) -> TestSuiteData {
+    let mut tests = HashMap::new();
+    let test_suites = get_test_suites(&metrics);
+    for suite in test_suites {
+        for test in &suite.tests {
+            // Poor man's detection of doctests based on the "(line XYZ)" suffix
+            let is_doctest = matches!(suite.metadata, TestSuiteMetadata::CargoPackage { .. })
+                && test.name.contains("(line");
+            let test_entry = Test { name: generate_test_name(&test.name, &suite), is_doctest };
+            tests.insert(test_entry, test.outcome.clone());
+        }
+    }
+    TestSuiteData { tests }
+}
+
+/// Normalizes Windows-style path delimiters to Unix-style paths
+/// and adds suite metadata to the test name.
+fn generate_test_name(name: &str, suite: &TestSuite) -> String {
+    let name = name.replace('\\', "/");
+    let stage = match suite.metadata {
+        TestSuiteMetadata::CargoPackage { stage, .. } => stage,
+        TestSuiteMetadata::Compiletest { stage, .. } => stage,
+    };
+
+    format!("{name} (stage {stage})")
+}
+
+/// Prints test changes in Markdown format to stdout.
+fn report_test_diffs(diff: AggregatedTestDiffs) {
+    println!("# Test differences");
+    if diff.diffs.is_empty() {
+        println!("No test diffs found");
+        return;
+    }
+
+    fn format_outcome(outcome: &TestOutcome) -> String {
+        match outcome {
+            TestOutcome::Passed => "pass".to_string(),
+            TestOutcome::Failed => "fail".to_string(),
+            TestOutcome::Ignored { ignore_reason } => {
+                let reason = match ignore_reason {
+                    Some(reason) => format!(" ({reason})"),
+                    None => String::new(),
+                };
+                format!("ignore{reason}")
+            }
+        }
+    }
+
+    fn format_diff(diff: &TestOutcomeDiff) -> String {
+        match diff {
+            TestOutcomeDiff::ChangeOutcome { before, after } => {
+                format!("{} -> {}", format_outcome(before), format_outcome(after))
+            }
+            TestOutcomeDiff::Missing { before } => {
+                format!("{} -> [missing]", format_outcome(before))
+            }
+            TestOutcomeDiff::Added(outcome) => {
+                format!("[missing] -> {}", format_outcome(outcome))
+            }
+        }
+    }
+
+    fn format_job_group(group: u64) -> String {
+        format!("**J{group}**")
+    }
+
+    // It would be quite noisy to repeat the jobs that contained the test changes after/next to
+    // every test diff. At the same time, grouping the test diffs by
+    // [unique set of jobs that contained them] also doesn't work well, because the test diffs
+    // would have to be duplicated several times.
+    // Instead, we create a set of unique job groups, and then print a job group after each test.
+    // We then print the job groups at the end, as a sort of index.
+    let mut grouped_diffs: Vec<(&TestDiff, u64)> = vec![];
+    let mut job_list_to_group: HashMap<&[JobName], u64> = HashMap::new();
+    let mut job_index: Vec<&[JobName]> = vec![];
+
+    let original_diff_count = diff.diffs.len();
+    let diffs = diff
+        .diffs
+        .into_iter()
+        .filter(|(diff, _)| !diff.test.is_doctest)
+        .map(|(diff, mut jobs)| {
+            jobs.sort();
+            (diff, jobs)
+        })
+        .collect::<Vec<_>>();
+    let doctest_count = original_diff_count.saturating_sub(diffs.len());
+
+    let max_diff_count = 100;
+    for (diff, jobs) in diffs.iter().take(max_diff_count) {
+        let jobs = &*jobs;
+        let job_group = match job_list_to_group.get(jobs.as_slice()) {
+            Some(id) => *id,
+            None => {
+                let id = job_index.len() as u64;
+                job_index.push(jobs);
+                job_list_to_group.insert(jobs, id);
+                id
+            }
+        };
+        grouped_diffs.push((diff, job_group));
+    }
+
+    // Sort diffs by job group and test name
+    grouped_diffs.sort_by(|(d1, g1), (d2, g2)| g1.cmp(&g2).then(d1.test.name.cmp(&d2.test.name)));
+
+    output_details(
+        &format!("Show {} test {}\n", original_diff_count, pluralize("diff", original_diff_count)),
+        || {
+            for (diff, job_group) in grouped_diffs {
+                println!(
+                    "- `{}`: {} ({})",
+                    diff.test.name,
+                    format_diff(&diff.diff),
+                    format_job_group(job_group)
+                );
+            }
+
+            let extra_diffs = diffs.len().saturating_sub(max_diff_count);
+            if extra_diffs > 0 {
+                println!(
+                    "\n(and {extra_diffs} additional {})",
+                    pluralize("test diff", extra_diffs)
+                );
+            }
+
+            if doctest_count > 0 {
+                println!(
+                    "\nAdditionally, {doctest_count} doctest {} were found. These are ignored, as they are noisy.",
+                    pluralize("diff", doctest_count)
+                );
+            }
+
+            // Now print the job group index
+            println!("\n**Job group index**\n");
+            for (group, jobs) in job_index.into_iter().enumerate() {
+                println!(
+                    "- {}: {}",
+                    format_job_group(group as u64),
+                    jobs.iter().map(|j| format!("`{j}`")).collect::<Vec<_>>().join(", ")
+                );
+            }
+        },
+    );
+}
--- a/src/ci/citool/src/main.rs
+++ b/src/ci/citool/src/main.rs
@ -1,25 +1,26 @@
+mod analysis;
 mod cpu_usage;
 mod datadog;
 mod jobs;
-mod merge_report;
 mod metrics;
 mod utils;

-use std::collections::BTreeMap;
+use std::collections::{BTreeMap, HashMap};
 use std::path::{Path, PathBuf};
 use std::process::Command;

+use analysis::output_bootstrap_stats;
 use anyhow::Context;
 use clap::Parser;
 use jobs::JobDatabase;
 use serde_yaml::Value;

+use crate::analysis::output_test_diffs;
 use crate::cpu_usage::load_cpu_usage;
 use crate::datadog::upload_datadog_metric;
 use crate::jobs::RunType;
-use crate::merge_report::post_merge_report;
-use crate::metrics::postprocess_metrics;
-use crate::utils::load_env_var;
+use crate::metrics::{JobMetrics, download_auto_job_metrics, download_job_metrics, load_metrics};
+use crate::utils::{load_env_var, output_details};

 const CI_DIRECTORY: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/..");
 const DOCKER_DIRECTORY: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../docker");
@ -137,6 +138,56 @@ fn upload_ci_metrics(cpu_usage_csv: &Path) -> anyhow::Result<()> {
    Ok(())
 }

+fn postprocess_metrics(
+    metrics_path: PathBuf,
+    parent: Option<String>,
+    job_name: Option<String>,
+) -> anyhow::Result<()> {
+    let metrics = load_metrics(&metrics_path)?;
+    output_bootstrap_stats(&metrics);
+
+    let (Some(parent), Some(job_name)) = (parent, job_name) else {
+        return Ok(());
+    };
+
+    // This command is executed also on PR builds, which might not have parent metrics
+    // available, because some PR jobs don't run on auto builds, and PR jobs do not upload metrics
+    // due to missing permissions.
+    // To avoid having to detect if this is a PR job, and to avoid having failed steps in PR jobs,
+    // we simply print an error if the parent metrics were not found, but otherwise exit
+    // successfully.
+    match download_job_metrics(&job_name, &parent).context("cannot download parent metrics") {
+        Ok(parent_metrics) => {
+            let job_metrics = HashMap::from([(
+                job_name,
+                JobMetrics { parent: Some(parent_metrics), current: metrics },
+            )]);
+            output_test_diffs(job_metrics);
+        }
+        Err(error) => {
+            eprintln!("Metrics for job `{job_name}` and commit `{parent}` not found: {error:?}");
+        }
+    }
+
+    Ok(())
+}
+
+fn post_merge_report(db: JobDatabase, current: String, parent: String) -> anyhow::Result<()> {
+    let metrics = download_auto_job_metrics(&db, &parent, &current)?;
+
+    output_details("What is this?", || {
+        println!(
+            r#"This is an experimental post-merge analysis report that shows differences in
+test outcomes between the merged PR and its parent PR."#
+        );
+    });
+
+    println!("\nComparing {parent} (parent) -> {current} (this PR)\n");
+    output_test_diffs(metrics);
+
+    Ok(())
+}
+
 #[derive(clap::Parser)]
 enum Args {
    /// Calculate a list of jobs that should be executed on CI.
@ -154,13 +205,19 @@ enum Args {
        #[clap(long = "type", default_value = "auto")]
        job_type: JobType,
    },
-    /// Postprocess the metrics.json file generated by bootstrap.
+    /// Postprocess the metrics.json file generated by bootstrap and output
+    /// various statistics.
+    /// If `--parent` and `--job-name` are provided, also display a diff
+    /// against previous metrics that are downloaded from CI.
    PostprocessMetrics {
        /// Path to the metrics.json file
        metrics_path: PathBuf,
-        /// Path to a file where the postprocessed metrics summary will be stored.
-        /// Usually, this will be GITHUB_STEP_SUMMARY on CI.
-        summary_path: PathBuf,
+        /// A parent SHA against which to compare.
+        #[clap(long, requires("job_name"))]
+        parent: Option<String>,
+        /// The name of the current job.
+        #[clap(long, requires("parent"))]
+        job_name: Option<String>,
    },
    /// Upload CI metrics to Datadog.
    UploadBuildMetrics {
@ -211,11 +268,11 @@ fn main() -> anyhow::Result<()> {
        Args::UploadBuildMetrics { cpu_usage_csv } => {
            upload_ci_metrics(&cpu_usage_csv)?;
        }
-        Args::PostprocessMetrics { metrics_path, summary_path } => {
-            postprocess_metrics(&metrics_path, &summary_path)?;
+        Args::PostprocessMetrics { metrics_path, parent, job_name } => {
+            postprocess_metrics(metrics_path, parent, job_name)?;
        }
-        Args::PostMergeReport { current: commit, parent } => {
-            post_merge_report(load_db(default_jobs_file)?, parent, commit)?;
+        Args::PostMergeReport { current, parent } => {
+            post_merge_report(load_db(default_jobs_file)?, current, parent)?;
        }
    }

--- a/src/ci/citool/src/merge_report.rs
+++ b/src/ci/citool/src/merge_report.rs
@ -1,318 +0,0 @@
-use std::collections::{HashMap, HashSet};
-use std::path::PathBuf;
-
-use anyhow::Context;
-use build_helper::metrics::{JsonRoot, TestOutcome, TestSuiteMetadata};
-
-use crate::jobs::JobDatabase;
-use crate::metrics::get_test_suites;
-
-type Sha = String;
-type JobName = String;
-
-/// Computes a post merge CI analysis report between the `parent` and `current` commits.
-pub fn post_merge_report(job_db: JobDatabase, parent: Sha, current: Sha) -> anyhow::Result<()> {
-    let jobs = download_all_metrics(&job_db, &parent, &current)?;
-    let aggregated_test_diffs = aggregate_test_diffs(&jobs)?;
-
-    println!("Comparing {parent} (base) -> {current} (this PR)\n");
-    report_test_diffs(aggregated_test_diffs);
-
-    Ok(())
-}
-
-struct JobMetrics {
-    parent: Option<JsonRoot>,
-    current: JsonRoot,
-}
-
-/// Download before/after metrics for all auto jobs in the job database.
-fn download_all_metrics(
-    job_db: &JobDatabase,
-    parent: &str,
-    current: &str,
-) -> anyhow::Result<HashMap<JobName, JobMetrics>> {
-    let mut jobs = HashMap::default();
-
-    for job in &job_db.auto_jobs {
-        eprintln!("Downloading metrics of job {}", job.name);
-        let metrics_parent = match download_job_metrics(&job.name, parent) {
-            Ok(metrics) => Some(metrics),
-            Err(error) => {
-                eprintln!(
-                    r#"Did not find metrics for job `{}` at `{}`: {error:?}.
-Maybe it was newly added?"#,
-                    job.name, parent
-                );
-                None
-            }
-        };
-        let metrics_current = download_job_metrics(&job.name, current)?;
-        jobs.insert(
-            job.name.clone(),
-            JobMetrics { parent: metrics_parent, current: metrics_current },
-        );
-    }
-    Ok(jobs)
-}
-
-/// Downloads job metrics of the given job for the given commit.
-/// Caches the result on the local disk.
-fn download_job_metrics(job_name: &str, sha: &str) -> anyhow::Result<JsonRoot> {
-    let cache_path = PathBuf::from(".citool-cache").join(sha).join(job_name).join("metrics.json");
-    if let Some(cache_entry) =
-        std::fs::read_to_string(&cache_path).ok().and_then(|data| serde_json::from_str(&data).ok())
-    {
-        return Ok(cache_entry);
-    }
-
-    let url = get_metrics_url(job_name, sha);
-    let mut response = ureq::get(&url).call()?;
-    if !response.status().is_success() {
-        return Err(anyhow::anyhow!(
-            "Cannot fetch metrics from {url}: {}\n{}",
-            response.status(),
-            response.body_mut().read_to_string()?
-        ));
-    }
-    let data: JsonRoot = response
-        .body_mut()
-        .read_json()
-        .with_context(|| anyhow::anyhow!("cannot deserialize metrics from {url}"))?;
-
-    // Ignore errors if cache cannot be created
-    if std::fs::create_dir_all(cache_path.parent().unwrap()).is_ok() {
-        if let Ok(serialized) = serde_json::to_string(&data) {
-            let _ = std::fs::write(&cache_path, &serialized);
-        }
-    }
-    Ok(data)
-}
-
-fn get_metrics_url(job_name: &str, sha: &str) -> String {
-    let suffix = if job_name.ends_with("-alt") { "-alt" } else { "" };
-    format!("https://ci-artifacts.rust-lang.org/rustc-builds{suffix}/{sha}/metrics-{job_name}.json")
-}
-
-/// Represents a difference in the outcome of tests between a base and a current commit.
-/// Maps test diffs to jobs that contained them.
-#[derive(Debug)]
-struct AggregatedTestDiffs {
-    diffs: HashMap<TestDiff, Vec<JobName>>,
-}
-
-fn aggregate_test_diffs(
-    jobs: &HashMap<JobName, JobMetrics>,
-) -> anyhow::Result<AggregatedTestDiffs> {
-    let mut diffs: HashMap<TestDiff, Vec<JobName>> = HashMap::new();
-
-    // Aggregate test suites
-    for (name, metrics) in jobs {
-        if let Some(parent) = &metrics.parent {
-            let tests_parent = aggregate_tests(parent);
-            let tests_current = aggregate_tests(&metrics.current);
-            for diff in calculate_test_diffs(tests_parent, tests_current) {
-                diffs.entry(diff).or_default().push(name.to_string());
-            }
-        }
-    }
-
-    Ok(AggregatedTestDiffs { diffs })
-}
-
-#[derive(Eq, PartialEq, Hash, Debug)]
-enum TestOutcomeDiff {
-    ChangeOutcome { before: TestOutcome, after: TestOutcome },
-    Missing { before: TestOutcome },
-    Added(TestOutcome),
-}
-
-#[derive(Eq, PartialEq, Hash, Debug)]
-struct TestDiff {
-    test: Test,
-    diff: TestOutcomeDiff,
-}
-
-fn calculate_test_diffs(parent: TestSuiteData, current: TestSuiteData) -> HashSet<TestDiff> {
-    let mut diffs = HashSet::new();
-    for (test, outcome) in &current.tests {
-        match parent.tests.get(test) {
-            Some(before) => {
-                if before != outcome {
-                    diffs.insert(TestDiff {
-                        test: test.clone(),
-                        diff: TestOutcomeDiff::ChangeOutcome {
-                            before: before.clone(),
-                            after: outcome.clone(),
-                        },
-                    });
-                }
-            }
-            None => {
-                diffs.insert(TestDiff {
-                    test: test.clone(),
-                    diff: TestOutcomeDiff::Added(outcome.clone()),
-                });
-            }
-        }
-    }
-    for (test, outcome) in &parent.tests {
-        if !current.tests.contains_key(test) {
-            diffs.insert(TestDiff {
-                test: test.clone(),
-                diff: TestOutcomeDiff::Missing { before: outcome.clone() },
-            });
-        }
-    }
-
-    diffs
-}
-
-/// Aggregates test suite executions from all bootstrap invocations in a given CI job.
-#[derive(Default)]
-struct TestSuiteData {
-    tests: HashMap<Test, TestOutcome>,
-}
-
-#[derive(Hash, PartialEq, Eq, Debug, Clone)]
-struct Test {
-    name: String,
-    is_doctest: bool,
-}
-
-/// Extracts all tests from the passed metrics and map them to their outcomes.
-fn aggregate_tests(metrics: &JsonRoot) -> TestSuiteData {
-    let mut tests = HashMap::new();
-    let test_suites = get_test_suites(&metrics);
-    for suite in test_suites {
-        for test in &suite.tests {
-            // Poor man's detection of doctests based on the "(line XYZ)" suffix
-            let is_doctest = matches!(suite.metadata, TestSuiteMetadata::CargoPackage { .. })
-                && test.name.contains("(line");
-            let test_entry = Test { name: normalize_test_name(&test.name), is_doctest };
-            tests.insert(test_entry, test.outcome.clone());
-        }
-    }
-    TestSuiteData { tests }
-}
-
-/// Normalizes Windows-style path delimiters to Unix-style paths.
-fn normalize_test_name(name: &str) -> String {
-    name.replace('\\', "/")
-}
-
-/// Prints test changes in Markdown format to stdout.
-fn report_test_diffs(diff: AggregatedTestDiffs) {
-    println!("## Test differences");
-    if diff.diffs.is_empty() {
-        println!("No test diffs found");
-        return;
-    }
-
-    fn format_outcome(outcome: &TestOutcome) -> String {
-        match outcome {
-            TestOutcome::Passed => "pass".to_string(),
-            TestOutcome::Failed => "fail".to_string(),
-            TestOutcome::Ignored { ignore_reason } => {
-                let reason = match ignore_reason {
-                    Some(reason) => format!(" ({reason})"),
-                    None => String::new(),
-                };
-                format!("ignore{reason}")
-            }
-        }
-    }
-
-    fn format_diff(diff: &TestOutcomeDiff) -> String {
-        match diff {
-            TestOutcomeDiff::ChangeOutcome { before, after } => {
-                format!("{} -> {}", format_outcome(before), format_outcome(after))
-            }
-            TestOutcomeDiff::Missing { before } => {
-                format!("{} -> [missing]", format_outcome(before))
-            }
-            TestOutcomeDiff::Added(outcome) => {
-                format!("[missing] -> {}", format_outcome(outcome))
-            }
-        }
-    }
-
-    fn format_job_group(group: u64) -> String {
-        format!("**J{group}**")
-    }
-
-    // It would be quite noisy to repeat the jobs that contained the test changes after/next to
-    // every test diff. At the same time, grouping the test diffs by
-    // [unique set of jobs that contained them] also doesn't work well, because the test diffs
-    // would have to be duplicated several times.
-    // Instead, we create a set of unique job groups, and then print a job group after each test.
-    // We then print the job groups at the end, as a sort of index.
-    let mut grouped_diffs: Vec<(&TestDiff, u64)> = vec![];
-    let mut job_list_to_group: HashMap<&[JobName], u64> = HashMap::new();
-    let mut job_index: Vec<&[JobName]> = vec![];
-
-    let original_diff_count = diff.diffs.len();
-    let diffs = diff
-        .diffs
-        .into_iter()
-        .filter(|(diff, _)| !diff.test.is_doctest)
-        .map(|(diff, mut jobs)| {
-            jobs.sort();
-            (diff, jobs)
-        })
-        .collect::<Vec<_>>();
-    let doctest_count = original_diff_count.saturating_sub(diffs.len());
-
-    let max_diff_count = 100;
-    for (diff, jobs) in diffs.iter().take(max_diff_count) {
-        let jobs = &*jobs;
-        let job_group = match job_list_to_group.get(jobs.as_slice()) {
-            Some(id) => *id,
-            None => {
-                let id = job_index.len() as u64;
-                job_index.push(jobs);
-                job_list_to_group.insert(jobs, id);
-                id
-            }
-        };
-        grouped_diffs.push((diff, job_group));
-    }
-
-    // Sort diffs by job group and test name
-    grouped_diffs.sort_by(|(d1, g1), (d2, g2)| g1.cmp(&g2).then(d1.test.name.cmp(&d2.test.name)));
-
-    for (diff, job_group) in grouped_diffs {
-        println!(
-            "- `{}`: {} ({})",
-            diff.test.name,
-            format_diff(&diff.diff),
-            format_job_group(job_group)
-        );
-    }
-
-    let extra_diffs = diffs.len().saturating_sub(max_diff_count);
-    if extra_diffs > 0 {
-        println!("\n(and {extra_diffs} additional {})", pluralize("test diff", extra_diffs));
-    }
-
-    if doctest_count > 0 {
-        println!(
-            "\nAdditionally, {doctest_count} doctest {} were found. These are ignored, as they are noisy.",
-            pluralize("diff", doctest_count)
-        );
-    }
-
-    // Now print the job group index
-    println!("\n**Job group index**\n");
-    for (group, jobs) in job_index.into_iter().enumerate() {
-        println!(
-            "- {}: {}",
-            format_job_group(group as u64),
-            jobs.iter().map(|j| format!("`{j}`")).collect::<Vec<_>>().join(", ")
-        );
-    }
-}
-
-fn pluralize(text: &str, count: usize) -> String {
-    if count == 1 { text.to_string() } else { format!("{text}s") }
-}
--- a/src/ci/citool/src/metrics.rs
+++ b/src/ci/citool/src/metrics.rs
@ -1,146 +1,12 @@
-use std::collections::BTreeMap;
-use std::fs::File;
-use std::io::Write;
+use std::collections::HashMap;
 use std::path::Path;

 use anyhow::Context;
-use build_helper::metrics::{
-    BuildStep, JsonNode, JsonRoot, TestOutcome, TestSuite, TestSuiteMetadata, format_build_steps,
-};
+use build_helper::metrics::{JsonNode, JsonRoot, TestSuite};

-pub fn postprocess_metrics(metrics_path: &Path, summary_path: &Path) -> anyhow::Result<()> {
-    let metrics = load_metrics(metrics_path)?;
+use crate::jobs::JobDatabase;

-    let mut file = File::options()
-        .append(true)
-        .create(true)
-        .open(summary_path)
-        .with_context(|| format!("Cannot open summary file at {summary_path:?}"))?;
-
-    if !metrics.invocations.is_empty() {
-        writeln!(file, "# Bootstrap steps")?;
-        record_bootstrap_step_durations(&metrics, &mut file)?;
-        record_test_suites(&metrics, &mut file)?;
-    }
-
-    Ok(())
-}
-
-fn record_bootstrap_step_durations(metrics: &JsonRoot, file: &mut File) -> anyhow::Result<()> {
-    for invocation in &metrics.invocations {
-        let step = BuildStep::from_invocation(invocation);
-        let table = format_build_steps(&step);
-        eprintln!("Step `{}`\n{table}\n", invocation.cmdline);
-        writeln!(
-            file,
-            r"<details>
-<summary>{}</summary>
-<pre><code>{table}</code></pre>
-</details>
-",
-            invocation.cmdline
-        )?;
-    }
-    eprintln!("Recorded {} bootstrap invocation(s)", metrics.invocations.len());
-
-    Ok(())
-}
-
-fn record_test_suites(metrics: &JsonRoot, file: &mut File) -> anyhow::Result<()> {
-    let suites = get_test_suites(&metrics);
-
-    if !suites.is_empty() {
-        let aggregated = aggregate_test_suites(&suites);
-        let table = render_table(aggregated);
-        writeln!(file, "\n# Test results\n")?;
-        writeln!(file, "{table}")?;
-    } else {
-        eprintln!("No test suites found in metrics");
-    }
-
-    Ok(())
-}
-
-fn render_table(suites: BTreeMap<String, TestSuiteRecord>) -> String {
-    use std::fmt::Write;
-
-    let mut table = "| Test suite | Passed ✅ | Ignored 🚫 | Failed  ❌ |\n".to_string();
-    writeln!(table, "|:------|------:|------:|------:|").unwrap();
-
-    fn compute_pct(value: f64, total: f64) -> f64 {
-        if total == 0.0 { 0.0 } else { value / total }
-    }
-
-    fn write_row(
-        buffer: &mut String,
-        name: &str,
-        record: &TestSuiteRecord,
-        surround: &str,
-    ) -> std::fmt::Result {
-        let TestSuiteRecord { passed, ignored, failed } = record;
-        let total = (record.passed + record.ignored + record.failed) as f64;
-        let passed_pct = compute_pct(*passed as f64, total) * 100.0;
-        let ignored_pct = compute_pct(*ignored as f64, total) * 100.0;
-        let failed_pct = compute_pct(*failed as f64, total) * 100.0;
-
-        write!(buffer, "| {surround}{name}{surround} |")?;
-        write!(buffer, " {surround}{passed} ({passed_pct:.0}%){surround} |")?;
-        write!(buffer, " {surround}{ignored} ({ignored_pct:.0}%){surround} |")?;
-        writeln!(buffer, " {surround}{failed} ({failed_pct:.0}%){surround} |")?;
-
-        Ok(())
-    }
-
-    let mut total = TestSuiteRecord::default();
-    for (name, record) in suites {
-        write_row(&mut table, &name, &record, "").unwrap();
-        total.passed += record.passed;
-        total.ignored += record.ignored;
-        total.failed += record.failed;
-    }
-    write_row(&mut table, "Total", &total, "**").unwrap();
-    table
-}
-
-#[derive(Default)]
-struct TestSuiteRecord {
-    passed: u64,
-    ignored: u64,
-    failed: u64,
-}
-
-fn test_metadata_name(metadata: &TestSuiteMetadata) -> String {
-    match metadata {
-        TestSuiteMetadata::CargoPackage { crates, stage, .. } => {
-            format!("{} (stage {stage})", crates.join(", "))
-        }
-        TestSuiteMetadata::Compiletest { suite, stage, .. } => {
-            format!("{suite} (stage {stage})")
-        }
-    }
-}
-
-fn aggregate_test_suites(suites: &[&TestSuite]) -> BTreeMap<String, TestSuiteRecord> {
-    let mut records: BTreeMap<String, TestSuiteRecord> = BTreeMap::new();
-    for suite in suites {
-        let name = test_metadata_name(&suite.metadata);
-        let record = records.entry(name).or_default();
-        for test in &suite.tests {
-            match test.outcome {
-                TestOutcome::Passed => {
-                    record.passed += 1;
-                }
-                TestOutcome::Failed => {
-                    record.failed += 1;
-                }
-                TestOutcome::Ignored { .. } => {
-                    record.ignored += 1;
-                }
-            }
-        }
-    }
-    records
-}
+pub type JobName = String;

 pub fn get_test_suites(metrics: &JsonRoot) -> Vec<&TestSuite> {
    fn visit_test_suites<'a>(nodes: &'a [JsonNode], suites: &mut Vec<&'a TestSuite>) {
@ -163,10 +29,68 @@ pub fn get_test_suites(metrics: &JsonRoot) -> Vec<&TestSuite> {
    suites
 }

-fn load_metrics(path: &Path) -> anyhow::Result<JsonRoot> {
+pub fn load_metrics(path: &Path) -> anyhow::Result<JsonRoot> {
    let metrics = std::fs::read_to_string(path)
        .with_context(|| format!("Cannot read JSON metrics from {path:?}"))?;
    let metrics: JsonRoot = serde_json::from_str(&metrics)
        .with_context(|| format!("Cannot deserialize JSON metrics from {path:?}"))?;
    Ok(metrics)
 }
+
+pub struct JobMetrics {
+    pub parent: Option<JsonRoot>,
+    pub current: JsonRoot,
+}
+
+/// Download before/after metrics for all auto jobs in the job database.
+/// `parent` and `current` should be commit SHAs.
+pub fn download_auto_job_metrics(
+    job_db: &JobDatabase,
+    parent: &str,
+    current: &str,
+) -> anyhow::Result<HashMap<JobName, JobMetrics>> {
+    let mut jobs = HashMap::default();
+
+    for job in &job_db.auto_jobs {
+        eprintln!("Downloading metrics of job {}", job.name);
+        let metrics_parent = match download_job_metrics(&job.name, parent) {
+            Ok(metrics) => Some(metrics),
+            Err(error) => {
+                eprintln!(
+                    r#"Did not find metrics for job `{}` at `{parent}`: {error:?}.
+Maybe it was newly added?"#,
+                    job.name
+                );
+                None
+            }
+        };
+        let metrics_current = download_job_metrics(&job.name, current)?;
+        jobs.insert(
+            job.name.clone(),
+            JobMetrics { parent: metrics_parent, current: metrics_current },
+        );
+    }
+    Ok(jobs)
+}
+
+pub fn download_job_metrics(job_name: &str, sha: &str) -> anyhow::Result<JsonRoot> {
+    let url = get_metrics_url(job_name, sha);
+    let mut response = ureq::get(&url).call()?;
+    if !response.status().is_success() {
+        return Err(anyhow::anyhow!(
+            "Cannot fetch metrics from {url}: {}\n{}",
+            response.status(),
+            response.body_mut().read_to_string()?
+        ));
+    }
+    let data: JsonRoot = response
+        .body_mut()
+        .read_json()
+        .with_context(|| anyhow::anyhow!("cannot deserialize metrics from {url}"))?;
+    Ok(data)
+}
+
+fn get_metrics_url(job_name: &str, sha: &str) -> String {
+    let suffix = if job_name.ends_with("-alt") { "-alt" } else { "" };
+    format!("https://ci-artifacts.rust-lang.org/rustc-builds{suffix}/{sha}/metrics-{job_name}.json")
+}
--- a/src/ci/citool/src/utils.rs
+++ b/src/ci/citool/src/utils.rs
@ -9,3 +9,23 @@ pub fn load_env_var(name: &str) -> anyhow::Result<String> {
 pub fn read_to_string<P: AsRef<Path>>(path: P) -> anyhow::Result<String> {
    std::fs::read_to_string(&path).with_context(|| format!("Cannot read file {:?}", path.as_ref()))
 }
+
+pub fn pluralize(text: &str, count: usize) -> String {
+    if count == 1 { text.to_string() } else { format!("{text}s") }
+}
+
+/// Outputs a HTML <details> section with the provided summary.
+/// Output printed by `func` will be contained within the section.
+pub fn output_details<F>(summary: &str, func: F)
+where
+    F: FnOnce(),
+{
+    println!(
+        r"<details>
+<summary>{summary}</summary>
+
+"
+    );
+    func();
+    println!("</details>\n");
+}