Rollup merge of #104439 - ferrocene:pa-generate-copyright, r=pnkfelix
Add prototype to generate `COPYRIGHT` from REUSE metadata This PR adds a prototype to generate the `COPYRIGHT` file from the metadata gathered with REUSE. There are two new tools: * `src/tools/collect-license-metadata` invokes REUSE, parses its output and stores a concise JSON representation of the metadata in `src/etc/license-metadata.json`. * `src/tools/generate-copyright` parses the metadata generated above, (in the future will) gather crate dependencies metadata, and renders the `COPYRIGHT.md` file. Note that since the contents of those files are currently incorrect, rather than outputting in the paths above, the files will be stored in `build/` and not committed. This will be changed once we're confident about the metadata. Eventually, `src/etc/license-metadata.json` will be committed into the repository and verified to be up to date by CI (similar to our GitHub Actions configuration), to avoid having people install REUSE on their local machine in most cases. You can see the (incorrect) generated files in https://gist.github.com/pietroalbini/3f3f22b6f9cc8533abf7494b6a50cf97. r? `@pnkfelix`
This commit is contained in:
commit
e5a01b97ee
15 changed files with 719 additions and 0 deletions
77
Cargo.lock
77
Cargo.lock
|
@ -581,6 +581,7 @@ dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
"num-integer",
|
"num-integer",
|
||||||
"num-traits",
|
"num-traits",
|
||||||
|
"serde",
|
||||||
"time",
|
"time",
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
@ -730,6 +731,16 @@ dependencies = [
|
||||||
"rustc-semver",
|
"rustc-semver",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "collect-license-metadata"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
|
"spdx-rs",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "color-eyre"
|
name = "color-eyre"
|
||||||
version = "0.6.2"
|
version = "0.6.2"
|
||||||
|
@ -1552,6 +1563,15 @@ dependencies = [
|
||||||
"termcolor",
|
"termcolor",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "generate-copyright"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "generic-array"
|
name = "generic-array"
|
||||||
version = "0.14.4"
|
version = "0.14.4"
|
||||||
|
@ -4864,6 +4884,35 @@ dependencies = [
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "spdx-expression"
|
||||||
|
version = "0.5.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "53d7ac03c67c572d85049d6db815e20a4a19b41b3d5cca732ac582342021ad77"
|
||||||
|
dependencies = [
|
||||||
|
"nom",
|
||||||
|
"serde",
|
||||||
|
"thiserror",
|
||||||
|
"tracing",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "spdx-rs"
|
||||||
|
version = "0.5.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b3c02f6eb7e7b4100c272f685a9ccaccaab302324e8c7ec3e2ee72340fb29ff3"
|
||||||
|
dependencies = [
|
||||||
|
"chrono",
|
||||||
|
"log",
|
||||||
|
"nom",
|
||||||
|
"serde",
|
||||||
|
"spdx-expression",
|
||||||
|
"strum",
|
||||||
|
"strum_macros",
|
||||||
|
"thiserror",
|
||||||
|
"uuid",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "stable_deref_trait"
|
name = "stable_deref_trait"
|
||||||
version = "1.2.0"
|
version = "1.2.0"
|
||||||
|
@ -4967,6 +5016,25 @@ version = "0.10.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "strum"
|
||||||
|
version = "0.24.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "strum_macros"
|
||||||
|
version = "0.24.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
|
||||||
|
dependencies = [
|
||||||
|
"heck",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"rustversion",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "syn"
|
name = "syn"
|
||||||
version = "1.0.102"
|
version = "1.0.102"
|
||||||
|
@ -5596,6 +5664,15 @@ version = "0.1.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8772a4ccbb4e89959023bc5b7cb8623a795caa7092d99f3aa9501b9484d4557d"
|
checksum = "8772a4ccbb4e89959023bc5b7cb8623a795caa7092d99f3aa9501b9484d4557d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "uuid"
|
||||||
|
version = "0.8.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7"
|
||||||
|
dependencies = [
|
||||||
|
"getrandom 0.2.0",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "valuable"
|
name = "valuable"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
|
|
@ -39,6 +39,8 @@ members = [
|
||||||
"src/tools/bump-stage0",
|
"src/tools/bump-stage0",
|
||||||
"src/tools/replace-version-placeholder",
|
"src/tools/replace-version-placeholder",
|
||||||
"src/tools/lld-wrapper",
|
"src/tools/lld-wrapper",
|
||||||
|
"src/tools/collect-license-metadata",
|
||||||
|
"src/tools/generate-copyright",
|
||||||
]
|
]
|
||||||
|
|
||||||
exclude = [
|
exclude = [
|
||||||
|
|
|
@ -255,6 +255,16 @@ changelog-seen = 2
|
||||||
# Defaults to the Python interpreter used to execute x.py
|
# Defaults to the Python interpreter used to execute x.py
|
||||||
#python = "python"
|
#python = "python"
|
||||||
|
|
||||||
|
# The path to the REUSE executable to use. Note that REUSE is not required in
|
||||||
|
# most cases, as our tooling relies on a cached (and shrinked) copy of the
|
||||||
|
# REUSE output present in the git repository and in our source tarballs.
|
||||||
|
#
|
||||||
|
# REUSE is only needed if your changes caused the overral licensing of the
|
||||||
|
# repository to change, and the cached copy has to be regenerated.
|
||||||
|
#
|
||||||
|
# Defaults to the "reuse" command in the system path.
|
||||||
|
#reuse = "reuse"
|
||||||
|
|
||||||
# Force Cargo to check that Cargo.lock describes the precise dependency
|
# Force Cargo to check that Cargo.lock describes the precise dependency
|
||||||
# set that all the Cargo.toml files create, instead of updating it.
|
# set that all the Cargo.toml files create, instead of updating it.
|
||||||
#locked-deps = false
|
#locked-deps = false
|
||||||
|
|
|
@ -754,6 +754,8 @@ impl<'a> Builder<'a> {
|
||||||
run::BumpStage0,
|
run::BumpStage0,
|
||||||
run::ReplaceVersionPlaceholder,
|
run::ReplaceVersionPlaceholder,
|
||||||
run::Miri,
|
run::Miri,
|
||||||
|
run::CollectLicenseMetadata,
|
||||||
|
run::GenerateCopyright,
|
||||||
),
|
),
|
||||||
// These commands either don't use paths, or they're special-cased in Build::build()
|
// These commands either don't use paths, or they're special-cased in Build::build()
|
||||||
Kind::Clean | Kind::Format | Kind::Setup => vec![],
|
Kind::Clean | Kind::Format | Kind::Setup => vec![],
|
||||||
|
|
|
@ -213,6 +213,7 @@ pub struct Config {
|
||||||
pub npm: Option<PathBuf>,
|
pub npm: Option<PathBuf>,
|
||||||
pub gdb: Option<PathBuf>,
|
pub gdb: Option<PathBuf>,
|
||||||
pub python: Option<PathBuf>,
|
pub python: Option<PathBuf>,
|
||||||
|
pub reuse: Option<PathBuf>,
|
||||||
pub cargo_native_static: bool,
|
pub cargo_native_static: bool,
|
||||||
pub configure_args: Vec<String>,
|
pub configure_args: Vec<String>,
|
||||||
|
|
||||||
|
@ -611,6 +612,7 @@ define_config! {
|
||||||
nodejs: Option<String> = "nodejs",
|
nodejs: Option<String> = "nodejs",
|
||||||
npm: Option<String> = "npm",
|
npm: Option<String> = "npm",
|
||||||
python: Option<String> = "python",
|
python: Option<String> = "python",
|
||||||
|
reuse: Option<String> = "reuse",
|
||||||
locked_deps: Option<bool> = "locked-deps",
|
locked_deps: Option<bool> = "locked-deps",
|
||||||
vendor: Option<bool> = "vendor",
|
vendor: Option<bool> = "vendor",
|
||||||
full_bootstrap: Option<bool> = "full-bootstrap",
|
full_bootstrap: Option<bool> = "full-bootstrap",
|
||||||
|
@ -1004,6 +1006,7 @@ impl Config {
|
||||||
config.npm = build.npm.map(PathBuf::from);
|
config.npm = build.npm.map(PathBuf::from);
|
||||||
config.gdb = build.gdb.map(PathBuf::from);
|
config.gdb = build.gdb.map(PathBuf::from);
|
||||||
config.python = build.python.map(PathBuf::from);
|
config.python = build.python.map(PathBuf::from);
|
||||||
|
config.reuse = build.reuse.map(PathBuf::from);
|
||||||
config.submodules = build.submodules;
|
config.submodules = build.submodules;
|
||||||
set(&mut config.low_priority, build.low_priority);
|
set(&mut config.low_priority, build.low_priority);
|
||||||
set(&mut config.compiler_docs, build.compiler_docs);
|
set(&mut config.compiler_docs, build.compiler_docs);
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
use std::path::PathBuf;
|
||||||
use std::process::Command;
|
use std::process::Command;
|
||||||
|
|
||||||
use crate::builder::{Builder, RunConfig, ShouldRun, Step};
|
use crate::builder::{Builder, RunConfig, ShouldRun, Step};
|
||||||
|
@ -189,3 +190,65 @@ impl Step for Miri {
|
||||||
builder.run(&mut miri);
|
builder.run(&mut miri);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialOrd, Ord, Copy, Clone, Hash, PartialEq, Eq)]
|
||||||
|
pub struct CollectLicenseMetadata;
|
||||||
|
|
||||||
|
impl Step for CollectLicenseMetadata {
|
||||||
|
type Output = PathBuf;
|
||||||
|
const ONLY_HOSTS: bool = true;
|
||||||
|
|
||||||
|
fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
|
||||||
|
run.path("src/tools/collect-license-metadata")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make_run(run: RunConfig<'_>) {
|
||||||
|
run.builder.ensure(CollectLicenseMetadata);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run(self, builder: &Builder<'_>) -> Self::Output {
|
||||||
|
let Some(reuse) = &builder.config.reuse else {
|
||||||
|
panic!("REUSE is required to collect the license metadata");
|
||||||
|
};
|
||||||
|
|
||||||
|
// Temporary location, it will be moved to src/etc once it's accurate.
|
||||||
|
let dest = builder.out.join("license-metadata.json");
|
||||||
|
|
||||||
|
let mut cmd = builder.tool_cmd(Tool::CollectLicenseMetadata);
|
||||||
|
cmd.env("REUSE_EXE", reuse);
|
||||||
|
cmd.env("DEST", &dest);
|
||||||
|
builder.run(&mut cmd);
|
||||||
|
|
||||||
|
dest
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialOrd, Ord, Copy, Clone, Hash, PartialEq, Eq)]
|
||||||
|
pub struct GenerateCopyright;
|
||||||
|
|
||||||
|
impl Step for GenerateCopyright {
|
||||||
|
type Output = PathBuf;
|
||||||
|
const ONLY_HOSTS: bool = true;
|
||||||
|
|
||||||
|
fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
|
||||||
|
run.path("src/tools/generate-copyright")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make_run(run: RunConfig<'_>) {
|
||||||
|
run.builder.ensure(GenerateCopyright);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run(self, builder: &Builder<'_>) -> Self::Output {
|
||||||
|
let license_metadata = builder.ensure(CollectLicenseMetadata);
|
||||||
|
|
||||||
|
// Temporary location, it will be moved to the proper one once it's accurate.
|
||||||
|
let dest = builder.out.join("COPYRIGHT.md");
|
||||||
|
|
||||||
|
let mut cmd = builder.tool_cmd(Tool::GenerateCopyright);
|
||||||
|
cmd.env("LICENSE_METADATA", &license_metadata);
|
||||||
|
cmd.env("DEST", &dest);
|
||||||
|
builder.run(&mut cmd);
|
||||||
|
|
||||||
|
dest
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -140,6 +140,13 @@ than building it.
|
||||||
.map(|p| cmd_finder.must_have(p))
|
.map(|p| cmd_finder.must_have(p))
|
||||||
.or_else(|| cmd_finder.maybe_have("gdb"));
|
.or_else(|| cmd_finder.maybe_have("gdb"));
|
||||||
|
|
||||||
|
build.config.reuse = build
|
||||||
|
.config
|
||||||
|
.reuse
|
||||||
|
.take()
|
||||||
|
.map(|p| cmd_finder.must_have(p))
|
||||||
|
.or_else(|| cmd_finder.maybe_have("reuse"));
|
||||||
|
|
||||||
// We're gonna build some custom C code here and there, host triples
|
// We're gonna build some custom C code here and there, host triples
|
||||||
// also build some C++ shims for LLVM so we need a C++ compiler.
|
// also build some C++ shims for LLVM so we need a C++ compiler.
|
||||||
for target in &build.targets {
|
for target in &build.targets {
|
||||||
|
|
|
@ -380,6 +380,8 @@ bootstrap_tool!(
|
||||||
HtmlChecker, "src/tools/html-checker", "html-checker";
|
HtmlChecker, "src/tools/html-checker", "html-checker";
|
||||||
BumpStage0, "src/tools/bump-stage0", "bump-stage0";
|
BumpStage0, "src/tools/bump-stage0", "bump-stage0";
|
||||||
ReplaceVersionPlaceholder, "src/tools/replace-version-placeholder", "replace-version-placeholder";
|
ReplaceVersionPlaceholder, "src/tools/replace-version-placeholder", "replace-version-placeholder";
|
||||||
|
CollectLicenseMetadata, "src/tools/collect-license-metadata", "collect-license-metadata";
|
||||||
|
GenerateCopyright, "src/tools/generate-copyright", "generate-copyright";
|
||||||
);
|
);
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)]
|
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)]
|
||||||
|
|
10
src/tools/collect-license-metadata/Cargo.toml
Normal file
10
src/tools/collect-license-metadata/Cargo.toml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
[package]
|
||||||
|
name = "collect-license-metadata"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0.65"
|
||||||
|
serde = { version = "1.0.147", features = ["derive"] }
|
||||||
|
serde_json = "1.0.85"
|
||||||
|
spdx-rs = "0.5.1"
|
65
src/tools/collect-license-metadata/src/licenses.rs
Normal file
65
src/tools/collect-license-metadata/src/licenses.rs
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
const COPYRIGHT_PREFIXES: &[&str] = &["SPDX-FileCopyrightText:", "Copyright", "(c)", "(C)", "©"];
|
||||||
|
|
||||||
|
pub(crate) struct LicensesInterner {
|
||||||
|
by_id: Vec<License>,
|
||||||
|
by_struct: HashMap<License, usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LicensesInterner {
|
||||||
|
pub(crate) fn new() -> Self {
|
||||||
|
LicensesInterner { by_id: Vec::new(), by_struct: HashMap::new() }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn intern(&mut self, mut license: License) -> LicenseId {
|
||||||
|
license.simplify();
|
||||||
|
if let Some(id) = self.by_struct.get(&license) {
|
||||||
|
LicenseId(*id)
|
||||||
|
} else {
|
||||||
|
let id = self.by_id.len();
|
||||||
|
self.by_id.push(license.clone());
|
||||||
|
self.by_struct.insert(license, id);
|
||||||
|
LicenseId(id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn resolve(&self, id: LicenseId) -> &License {
|
||||||
|
&self.by_id[id.0]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, serde::Serialize)]
|
||||||
|
#[serde(transparent)]
|
||||||
|
pub(crate) struct LicenseId(usize);
|
||||||
|
|
||||||
|
#[derive(Clone, Hash, PartialEq, Eq, serde::Serialize)]
|
||||||
|
pub(crate) struct License {
|
||||||
|
pub(crate) spdx: String,
|
||||||
|
pub(crate) copyright: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl License {
|
||||||
|
fn simplify(&mut self) {
|
||||||
|
self.remove_copyright_prefixes();
|
||||||
|
self.copyright.sort();
|
||||||
|
self.copyright.dedup();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn remove_copyright_prefixes(&mut self) {
|
||||||
|
for copyright in &mut self.copyright {
|
||||||
|
let mut stripped = copyright.trim();
|
||||||
|
let mut previous_stripped;
|
||||||
|
loop {
|
||||||
|
previous_stripped = stripped;
|
||||||
|
for pattern in COPYRIGHT_PREFIXES {
|
||||||
|
stripped = stripped.trim_start_matches(pattern).trim_start();
|
||||||
|
}
|
||||||
|
if stripped == previous_stripped {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*copyright = stripped.into();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
30
src/tools/collect-license-metadata/src/main.rs
Normal file
30
src/tools/collect-license-metadata/src/main.rs
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
mod licenses;
|
||||||
|
mod path_tree;
|
||||||
|
mod reuse;
|
||||||
|
|
||||||
|
use crate::licenses::LicensesInterner;
|
||||||
|
use anyhow::Error;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
fn main() -> Result<(), Error> {
|
||||||
|
let reuse_exe: PathBuf = std::env::var_os("REUSE_EXE").expect("Missing REUSE_EXE").into();
|
||||||
|
let dest: PathBuf = std::env::var_os("DEST").expect("Missing DEST").into();
|
||||||
|
|
||||||
|
let mut interner = LicensesInterner::new();
|
||||||
|
let paths = crate::reuse::collect(&reuse_exe, &mut interner)?;
|
||||||
|
|
||||||
|
let mut tree = crate::path_tree::build(paths);
|
||||||
|
tree.simplify();
|
||||||
|
|
||||||
|
if let Some(parent) = dest.parent() {
|
||||||
|
std::fs::create_dir_all(parent)?;
|
||||||
|
}
|
||||||
|
std::fs::write(
|
||||||
|
&dest,
|
||||||
|
&serde_json::to_vec_pretty(&serde_json::json!({
|
||||||
|
"files": crate::path_tree::expand_interned_licenses(tree, &interner),
|
||||||
|
}))?,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
294
src/tools/collect-license-metadata/src/path_tree.rs
Normal file
294
src/tools/collect-license-metadata/src/path_tree.rs
Normal file
|
@ -0,0 +1,294 @@
|
||||||
|
//! Tools like REUSE output per-file licensing information, but we need to condense it in the
|
||||||
|
//! minimum amount of data that still represents the same licensing metadata. This module is
|
||||||
|
//! responsible for that, by turning the list of paths into a tree and executing simplification
|
||||||
|
//! passes over the tree to remove redundant information.
|
||||||
|
|
||||||
|
use crate::licenses::{License, LicenseId, LicensesInterner};
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
|
#[derive(serde::Serialize)]
|
||||||
|
#[serde(rename_all = "kebab-case", tag = "type")]
|
||||||
|
pub(crate) enum Node<L> {
|
||||||
|
Root { childs: Vec<Node<L>> },
|
||||||
|
Directory { name: PathBuf, childs: Vec<Node<L>>, license: Option<L> },
|
||||||
|
File { name: PathBuf, license: L },
|
||||||
|
FileGroup { names: Vec<PathBuf>, license: L },
|
||||||
|
Empty,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Node<LicenseId> {
|
||||||
|
pub(crate) fn simplify(&mut self) {
|
||||||
|
self.merge_directories();
|
||||||
|
self.collapse_in_licensed_directories();
|
||||||
|
self.merge_directory_licenses();
|
||||||
|
self.merge_file_groups();
|
||||||
|
self.remove_empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Initially, the build() function constructs a list of separate paths from the file
|
||||||
|
/// system root down to each file, like so:
|
||||||
|
///
|
||||||
|
/// ```text
|
||||||
|
/// ┌─► ./ ──► compiler/ ──► rustc/ ──► src/ ──► main.rs
|
||||||
|
/// │
|
||||||
|
/// <root> ─┼─► ./ ──► compiler/ ──► rustc/ ──► Cargo.toml
|
||||||
|
/// │
|
||||||
|
/// └─► ./ ──► library/ ───► std/ ──► Cargo.toml
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// This pass is responsible for turning that into a proper directory tree:
|
||||||
|
///
|
||||||
|
/// ```text
|
||||||
|
/// ┌─► compiler/ ──► rustc/ ──┬─► src/ ──► main.rs
|
||||||
|
/// │ │
|
||||||
|
/// <root> ──► ./ ──┤ └─► Cargo.toml
|
||||||
|
/// │
|
||||||
|
/// └─► library/ ───► std/ ──► Cargo.toml
|
||||||
|
/// ```
|
||||||
|
fn merge_directories(&mut self) {
|
||||||
|
match self {
|
||||||
|
Node::Root { childs } | Node::Directory { childs, license: None, .. } => {
|
||||||
|
let mut directories = BTreeMap::new();
|
||||||
|
let mut files = Vec::new();
|
||||||
|
|
||||||
|
for child in childs.drain(..) {
|
||||||
|
match child {
|
||||||
|
Node::Directory { name, mut childs, license: None } => {
|
||||||
|
directories.entry(name).or_insert_with(Vec::new).append(&mut childs);
|
||||||
|
}
|
||||||
|
file @ Node::File { .. } => {
|
||||||
|
files.push(file);
|
||||||
|
}
|
||||||
|
Node::Empty => {}
|
||||||
|
Node::Root { .. } => {
|
||||||
|
panic!("can't have a root inside another element");
|
||||||
|
}
|
||||||
|
Node::FileGroup { .. } => {
|
||||||
|
panic!("FileGroup should not be present at this stage");
|
||||||
|
}
|
||||||
|
Node::Directory { license: Some(_), .. } => {
|
||||||
|
panic!("license should not be set at this stage");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
childs.extend(directories.into_iter().map(|(name, childs)| Node::Directory {
|
||||||
|
name,
|
||||||
|
childs,
|
||||||
|
license: None,
|
||||||
|
}));
|
||||||
|
childs.append(&mut files);
|
||||||
|
|
||||||
|
for child in &mut *childs {
|
||||||
|
child.merge_directories();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::Empty => {}
|
||||||
|
Node::File { .. } => {}
|
||||||
|
Node::FileGroup { .. } => {
|
||||||
|
panic!("FileGroup should not be present at this stage");
|
||||||
|
}
|
||||||
|
Node::Directory { license: Some(_), .. } => {
|
||||||
|
panic!("license should not be set at this stage");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// In our codebase, most files in a directory have the same license as the other files in that
|
||||||
|
/// same directory, so it's redundant to store licensing metadata for all the files. Instead,
|
||||||
|
/// we can add a license for a whole directory, and only record the exceptions to a directory
|
||||||
|
/// licensing metadata.
|
||||||
|
///
|
||||||
|
/// We cannot instead record only the difference to Rust's standard licensing, as the majority
|
||||||
|
/// of the files in our repository are *not* licensed under Rust's standard licensing due to
|
||||||
|
/// our inclusion of LLVM.
|
||||||
|
fn collapse_in_licensed_directories(&mut self) {
|
||||||
|
match self {
|
||||||
|
Node::Directory { childs, license, .. } => {
|
||||||
|
for child in &mut *childs {
|
||||||
|
child.collapse_in_licensed_directories();
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut licenses_count = BTreeMap::new();
|
||||||
|
for child in &*childs {
|
||||||
|
let Some(license) = child.license() else { continue };
|
||||||
|
*licenses_count.entry(license).or_insert(0) += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let most_popular_license = licenses_count
|
||||||
|
.into_iter()
|
||||||
|
.max_by_key(|(_, count)| *count)
|
||||||
|
.map(|(license, _)| license);
|
||||||
|
|
||||||
|
if let Some(most_popular_license) = most_popular_license {
|
||||||
|
childs.retain(|child| child.license() != Some(most_popular_license));
|
||||||
|
*license = Some(most_popular_license);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::Root { childs } => {
|
||||||
|
for child in &mut *childs {
|
||||||
|
child.collapse_in_licensed_directories();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::File { .. } => {}
|
||||||
|
Node::FileGroup { .. } => {}
|
||||||
|
Node::Empty => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reduce the depth of the tree by merging subdirectories with the same license as their
|
||||||
|
/// parent directory into their parent, and adjusting the paths of the childs accordingly.
|
||||||
|
fn merge_directory_licenses(&mut self) {
|
||||||
|
match self {
|
||||||
|
Node::Root { childs } => {
|
||||||
|
for child in &mut *childs {
|
||||||
|
child.merge_directory_licenses();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::Directory { childs, license, .. } => {
|
||||||
|
let mut to_add = Vec::new();
|
||||||
|
for child in &mut *childs {
|
||||||
|
child.merge_directory_licenses();
|
||||||
|
|
||||||
|
let Node::Directory {
|
||||||
|
name: child_name,
|
||||||
|
childs: child_childs,
|
||||||
|
license: child_license,
|
||||||
|
} = child else { continue };
|
||||||
|
|
||||||
|
if child_license != license {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for mut child_child in child_childs.drain(..) {
|
||||||
|
match &mut child_child {
|
||||||
|
Node::Root { .. } => {
|
||||||
|
panic!("can't have a root inside another element");
|
||||||
|
}
|
||||||
|
Node::FileGroup { .. } => {
|
||||||
|
panic!("FileGroup should not be present at this stage");
|
||||||
|
}
|
||||||
|
Node::Directory { name: child_child_name, .. } => {
|
||||||
|
*child_child_name = child_name.join(&child_child_name);
|
||||||
|
}
|
||||||
|
Node::File { name: child_child_name, .. } => {
|
||||||
|
*child_child_name = child_name.join(&child_child_name);
|
||||||
|
}
|
||||||
|
Node::Empty => {}
|
||||||
|
}
|
||||||
|
to_add.push(child_child);
|
||||||
|
}
|
||||||
|
|
||||||
|
*child = Node::Empty;
|
||||||
|
}
|
||||||
|
childs.append(&mut to_add);
|
||||||
|
}
|
||||||
|
Node::Empty => {}
|
||||||
|
Node::File { .. } => {}
|
||||||
|
Node::FileGroup { .. } => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This pass groups multiple files in a directory with the same license into a single
|
||||||
|
/// "FileGroup", so that the license of all those files can be reported as a group.
|
||||||
|
///
|
||||||
|
/// Crucially this pass runs after collapse_in_licensed_directories, so the most common license
|
||||||
|
/// will already be marked as the directory's license and won't be turned into a group.
|
||||||
|
fn merge_file_groups(&mut self) {
|
||||||
|
match self {
|
||||||
|
Node::Root { childs } | Node::Directory { childs, .. } => {
|
||||||
|
let mut grouped = BTreeMap::new();
|
||||||
|
|
||||||
|
for child in &mut *childs {
|
||||||
|
child.merge_file_groups();
|
||||||
|
if let Node::File { name, license } = child {
|
||||||
|
grouped.entry(*license).or_insert_with(Vec::new).push(name.clone());
|
||||||
|
*child = Node::Empty;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (license, mut names) in grouped.into_iter() {
|
||||||
|
if names.len() == 1 {
|
||||||
|
childs.push(Node::File { license, name: names.pop().unwrap() });
|
||||||
|
} else {
|
||||||
|
childs.push(Node::FileGroup { license, names });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::File { .. } => {}
|
||||||
|
Node::FileGroup { .. } => panic!("FileGroup should not be present at this stage"),
|
||||||
|
Node::Empty => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Some nodes were replaced with Node::Empty to mark them for deletion. As the last step, make
|
||||||
|
/// sure to remove them from the tree.
|
||||||
|
fn remove_empty(&mut self) {
|
||||||
|
match self {
|
||||||
|
Node::Root { childs } | Node::Directory { childs, .. } => {
|
||||||
|
for child in &mut *childs {
|
||||||
|
child.remove_empty();
|
||||||
|
}
|
||||||
|
childs.retain(|child| !matches!(child, Node::Empty));
|
||||||
|
}
|
||||||
|
Node::FileGroup { .. } => {}
|
||||||
|
Node::File { .. } => {}
|
||||||
|
Node::Empty => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn license(&self) -> Option<LicenseId> {
|
||||||
|
match self {
|
||||||
|
Node::Directory { childs, license: Some(license), .. } if childs.is_empty() => {
|
||||||
|
Some(*license)
|
||||||
|
}
|
||||||
|
Node::File { license, .. } => Some(*license),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn build(mut input: Vec<(PathBuf, LicenseId)>) -> Node<LicenseId> {
|
||||||
|
let mut childs = Vec::new();
|
||||||
|
|
||||||
|
// Ensure reproducibility of all future steps.
|
||||||
|
input.sort();
|
||||||
|
|
||||||
|
for (path, license) in input {
|
||||||
|
let mut node = Node::File { name: path.file_name().unwrap().into(), license };
|
||||||
|
for component in path.parent().unwrap_or_else(|| Path::new(".")).components().rev() {
|
||||||
|
node = Node::Directory {
|
||||||
|
name: component.as_os_str().into(),
|
||||||
|
childs: vec![node],
|
||||||
|
license: None,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
childs.push(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
Node::Root { childs }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert a `Node<LicenseId>` into a `Node<&License>`, expanding all interned license IDs with a
|
||||||
|
/// reference to the actual license metadata.
|
||||||
|
pub(crate) fn expand_interned_licenses(
|
||||||
|
node: Node<LicenseId>,
|
||||||
|
interner: &LicensesInterner,
|
||||||
|
) -> Node<&License> {
|
||||||
|
match node {
|
||||||
|
Node::Root { childs } => Node::Root {
|
||||||
|
childs: childs.into_iter().map(|child| strip_interning(child, interner)).collect(),
|
||||||
|
},
|
||||||
|
Node::Directory { name, childs, license } => Node::Directory {
|
||||||
|
childs: childs.into_iter().map(|child| strip_interning(child, interner)).collect(),
|
||||||
|
license: license.map(|license| interner.resolve(license)),
|
||||||
|
name,
|
||||||
|
},
|
||||||
|
Node::File { name, license } => Node::File { name, license: interner.resolve(license) },
|
||||||
|
Node::FileGroup { names, license } => {
|
||||||
|
Node::FileGroup { names, license: interner.resolve(license) }
|
||||||
|
}
|
||||||
|
Node::Empty => Node::Empty,
|
||||||
|
}
|
||||||
|
}
|
49
src/tools/collect-license-metadata/src/reuse.rs
Normal file
49
src/tools/collect-license-metadata/src/reuse.rs
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
use crate::licenses::{License, LicenseId, LicensesInterner};
|
||||||
|
use anyhow::Error;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::process::{Command, Stdio};
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
|
pub(crate) fn collect(
|
||||||
|
reuse_exe: &Path,
|
||||||
|
interner: &mut LicensesInterner,
|
||||||
|
) -> Result<Vec<(PathBuf, LicenseId)>, Error> {
|
||||||
|
eprintln!("gathering license information from REUSE");
|
||||||
|
let start = Instant::now();
|
||||||
|
let raw = &obtain_spdx_document(reuse_exe)?;
|
||||||
|
eprintln!("finished gathering the license information from REUSE in {:.2?}", start.elapsed());
|
||||||
|
|
||||||
|
let document = spdx_rs::parsers::spdx_from_tag_value(&raw)?;
|
||||||
|
|
||||||
|
let mut result = Vec::new();
|
||||||
|
for file in document.file_information {
|
||||||
|
let license = interner.intern(License {
|
||||||
|
spdx: file.concluded_license.to_string(),
|
||||||
|
copyright: file.copyright_text.split('\n').map(|s| s.into()).collect(),
|
||||||
|
});
|
||||||
|
|
||||||
|
result.push((file.file_name.into(), license));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn obtain_spdx_document(reuse_exe: &Path) -> Result<String, Error> {
|
||||||
|
let output = Command::new(reuse_exe)
|
||||||
|
.args(&["spdx", "--add-license-concluded", "--creator-person=bors"])
|
||||||
|
.stdout(Stdio::piped())
|
||||||
|
.spawn()?
|
||||||
|
.wait_with_output()?;
|
||||||
|
|
||||||
|
if !output.status.success() {
|
||||||
|
eprintln!();
|
||||||
|
eprintln!("Note that Rust requires some REUSE features that might not be present in the");
|
||||||
|
eprintln!("release you're using. Make sure your REUSE release includes these PRs:");
|
||||||
|
eprintln!();
|
||||||
|
eprintln!(" - https://github.com/fsfe/reuse-tool/pull/623");
|
||||||
|
eprintln!();
|
||||||
|
anyhow::bail!("collecting licensing information with REUSE failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(String::from_utf8(output.stdout)?)
|
||||||
|
}
|
11
src/tools/generate-copyright/Cargo.toml
Normal file
11
src/tools/generate-copyright/Cargo.toml
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
[package]
|
||||||
|
name = "generate-copyright"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0.65"
|
||||||
|
serde = { version = "1.0.147", features = ["derive"] }
|
||||||
|
serde_json = "1.0.85"
|
94
src/tools/generate-copyright/src/main.rs
Normal file
94
src/tools/generate-copyright/src/main.rs
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
use anyhow::Error;
|
||||||
|
use std::io::Write;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
fn main() -> Result<(), Error> {
|
||||||
|
let dest = env_path("DEST")?;
|
||||||
|
let license_metadata = env_path("LICENSE_METADATA")?;
|
||||||
|
|
||||||
|
let metadata: Metadata = serde_json::from_slice(&std::fs::read(&license_metadata)?)?;
|
||||||
|
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
render_recursive(&metadata.files, &mut buffer, 0)?;
|
||||||
|
|
||||||
|
std::fs::write(&dest, &buffer)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn render_recursive(node: &Node, buffer: &mut Vec<u8>, depth: usize) -> Result<(), Error> {
|
||||||
|
let prefix = std::iter::repeat("> ").take(depth + 1).collect::<String>();
|
||||||
|
|
||||||
|
match node {
|
||||||
|
Node::Root { childs } => {
|
||||||
|
for child in childs {
|
||||||
|
render_recursive(child, buffer, depth)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::Directory { name, childs, license } => {
|
||||||
|
render_license(&prefix, std::iter::once(name), license, buffer)?;
|
||||||
|
if !childs.is_empty() {
|
||||||
|
writeln!(buffer, "{prefix}")?;
|
||||||
|
writeln!(buffer, "{prefix}*Exceptions:*")?;
|
||||||
|
for child in childs {
|
||||||
|
writeln!(buffer, "{prefix}")?;
|
||||||
|
render_recursive(child, buffer, depth + 1)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::FileGroup { names, license } => {
|
||||||
|
render_license(&prefix, names.iter(), license, buffer)?;
|
||||||
|
}
|
||||||
|
Node::File { name, license } => {
|
||||||
|
render_license(&prefix, std::iter::once(name), license, buffer)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn render_license<'a>(
|
||||||
|
prefix: &str,
|
||||||
|
names: impl Iterator<Item = &'a String>,
|
||||||
|
license: &License,
|
||||||
|
buffer: &mut Vec<u8>,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
for name in names {
|
||||||
|
writeln!(buffer, "{prefix}**`{name}`** ")?;
|
||||||
|
}
|
||||||
|
writeln!(buffer, "{prefix}License: `{}` ", license.spdx)?;
|
||||||
|
for (i, copyright) in license.copyright.iter().enumerate() {
|
||||||
|
let suffix = if i == license.copyright.len() - 1 { "" } else { " " };
|
||||||
|
writeln!(buffer, "{prefix}Copyright: {copyright}{suffix}")?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(serde::Deserialize)]
|
||||||
|
struct Metadata {
|
||||||
|
files: Node,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(serde::Deserialize)]
|
||||||
|
#[serde(rename_all = "kebab-case", tag = "type")]
|
||||||
|
pub(crate) enum Node {
|
||||||
|
Root { childs: Vec<Node> },
|
||||||
|
Directory { name: String, childs: Vec<Node>, license: License },
|
||||||
|
File { name: String, license: License },
|
||||||
|
FileGroup { names: Vec<String>, license: License },
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(serde::Deserialize)]
|
||||||
|
struct License {
|
||||||
|
spdx: String,
|
||||||
|
copyright: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn env_path(var: &str) -> Result<PathBuf, Error> {
|
||||||
|
if let Some(var) = std::env::var_os(var) {
|
||||||
|
Ok(var.into())
|
||||||
|
} else {
|
||||||
|
anyhow::bail!("missing environment variable {var}")
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue