add tool to collect license metadata from REUSE
This commit is contained in:
parent
17ee25d775
commit
13efb20846
10 changed files with 522 additions and 0 deletions
68
Cargo.lock
68
Cargo.lock
|
@ -563,6 +563,7 @@ dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
"num-integer",
|
"num-integer",
|
||||||
"num-traits",
|
"num-traits",
|
||||||
|
"serde",
|
||||||
"time",
|
"time",
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
@ -712,6 +713,16 @@ dependencies = [
|
||||||
"rustc-semver",
|
"rustc-semver",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "collect-license-metadata"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
|
"spdx-rs",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "color-eyre"
|
name = "color-eyre"
|
||||||
version = "0.6.2"
|
version = "0.6.2"
|
||||||
|
@ -4628,6 +4639,35 @@ dependencies = [
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "spdx-expression"
|
||||||
|
version = "0.5.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "53d7ac03c67c572d85049d6db815e20a4a19b41b3d5cca732ac582342021ad77"
|
||||||
|
dependencies = [
|
||||||
|
"nom",
|
||||||
|
"serde",
|
||||||
|
"thiserror",
|
||||||
|
"tracing",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "spdx-rs"
|
||||||
|
version = "0.5.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b3c02f6eb7e7b4100c272f685a9ccaccaab302324e8c7ec3e2ee72340fb29ff3"
|
||||||
|
dependencies = [
|
||||||
|
"chrono",
|
||||||
|
"log",
|
||||||
|
"nom",
|
||||||
|
"serde",
|
||||||
|
"spdx-expression",
|
||||||
|
"strum",
|
||||||
|
"strum_macros",
|
||||||
|
"thiserror",
|
||||||
|
"uuid",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "stable_deref_trait"
|
name = "stable_deref_trait"
|
||||||
version = "1.2.0"
|
version = "1.2.0"
|
||||||
|
@ -4731,6 +4771,25 @@ version = "0.10.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "strum"
|
||||||
|
version = "0.24.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "strum_macros"
|
||||||
|
version = "0.24.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
|
||||||
|
dependencies = [
|
||||||
|
"heck",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"rustversion",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "syn"
|
name = "syn"
|
||||||
version = "1.0.102"
|
version = "1.0.102"
|
||||||
|
@ -5357,6 +5416,15 @@ version = "0.1.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8772a4ccbb4e89959023bc5b7cb8623a795caa7092d99f3aa9501b9484d4557d"
|
checksum = "8772a4ccbb4e89959023bc5b7cb8623a795caa7092d99f3aa9501b9484d4557d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "uuid"
|
||||||
|
version = "0.8.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7"
|
||||||
|
dependencies = [
|
||||||
|
"getrandom 0.2.0",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "valuable"
|
name = "valuable"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
|
|
@ -39,6 +39,7 @@ members = [
|
||||||
"src/tools/bump-stage0",
|
"src/tools/bump-stage0",
|
||||||
"src/tools/replace-version-placeholder",
|
"src/tools/replace-version-placeholder",
|
||||||
"src/tools/lld-wrapper",
|
"src/tools/lld-wrapper",
|
||||||
|
"src/tools/collect-license-metadata",
|
||||||
]
|
]
|
||||||
|
|
||||||
exclude = [
|
exclude = [
|
||||||
|
|
|
@ -753,6 +753,7 @@ impl<'a> Builder<'a> {
|
||||||
run::BumpStage0,
|
run::BumpStage0,
|
||||||
run::ReplaceVersionPlaceholder,
|
run::ReplaceVersionPlaceholder,
|
||||||
run::Miri,
|
run::Miri,
|
||||||
|
run::CollectLicenseMetadata,
|
||||||
),
|
),
|
||||||
// These commands either don't use paths, or they're special-cased in Build::build()
|
// These commands either don't use paths, or they're special-cased in Build::build()
|
||||||
Kind::Clean | Kind::Format | Kind::Setup => vec![],
|
Kind::Clean | Kind::Format | Kind::Setup => vec![],
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
use std::path::PathBuf;
|
||||||
use std::process::Command;
|
use std::process::Command;
|
||||||
|
|
||||||
use crate::builder::{Builder, RunConfig, ShouldRun, Step};
|
use crate::builder::{Builder, RunConfig, ShouldRun, Step};
|
||||||
|
@ -189,3 +190,35 @@ impl Step for Miri {
|
||||||
builder.run(&mut miri);
|
builder.run(&mut miri);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialOrd, Ord, Copy, Clone, Hash, PartialEq, Eq)]
|
||||||
|
pub struct CollectLicenseMetadata;
|
||||||
|
|
||||||
|
impl Step for CollectLicenseMetadata {
|
||||||
|
type Output = PathBuf;
|
||||||
|
const ONLY_HOSTS: bool = true;
|
||||||
|
|
||||||
|
fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
|
||||||
|
run.path("src/tools/collect-license-metadata")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make_run(run: RunConfig<'_>) {
|
||||||
|
run.builder.ensure(CollectLicenseMetadata);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run(self, builder: &Builder<'_>) -> Self::Output {
|
||||||
|
let Some(reuse) = &builder.config.reuse else {
|
||||||
|
panic!("REUSE is required to collect the license metadata");
|
||||||
|
};
|
||||||
|
|
||||||
|
// Temporary location, it will be moved to src/etc once it's accurate.
|
||||||
|
let dest = builder.out.join("license-metadata.json");
|
||||||
|
|
||||||
|
let mut cmd = builder.tool_cmd(Tool::CollectLicenseMetadata);
|
||||||
|
cmd.env("REUSE_EXE", reuse);
|
||||||
|
cmd.env("DEST", &dest);
|
||||||
|
builder.run(&mut cmd);
|
||||||
|
|
||||||
|
dest
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -380,6 +380,7 @@ bootstrap_tool!(
|
||||||
HtmlChecker, "src/tools/html-checker", "html-checker";
|
HtmlChecker, "src/tools/html-checker", "html-checker";
|
||||||
BumpStage0, "src/tools/bump-stage0", "bump-stage0";
|
BumpStage0, "src/tools/bump-stage0", "bump-stage0";
|
||||||
ReplaceVersionPlaceholder, "src/tools/replace-version-placeholder", "replace-version-placeholder";
|
ReplaceVersionPlaceholder, "src/tools/replace-version-placeholder", "replace-version-placeholder";
|
||||||
|
CollectLicenseMetadata, "src/tools/collect-license-metadata", "collect-license-metadata";
|
||||||
);
|
);
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)]
|
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)]
|
||||||
|
|
10
src/tools/collect-license-metadata/Cargo.toml
Normal file
10
src/tools/collect-license-metadata/Cargo.toml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
[package]
|
||||||
|
name = "collect-license-metadata"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0.65"
|
||||||
|
serde = { version = "1.0.147", features = ["derive"] }
|
||||||
|
serde_json = "1.0.85"
|
||||||
|
spdx-rs = "0.5.1"
|
37
src/tools/collect-license-metadata/src/licenses.rs
Normal file
37
src/tools/collect-license-metadata/src/licenses.rs
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
pub(crate) struct LicensesInterner {
|
||||||
|
by_id: Vec<License>,
|
||||||
|
by_struct: HashMap<License, usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LicensesInterner {
|
||||||
|
pub(crate) fn new() -> Self {
|
||||||
|
LicensesInterner { by_id: Vec::new(), by_struct: HashMap::new() }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn intern(&mut self, license: License) -> LicenseId {
|
||||||
|
if let Some(id) = self.by_struct.get(&license) {
|
||||||
|
LicenseId(*id)
|
||||||
|
} else {
|
||||||
|
let id = self.by_id.len();
|
||||||
|
self.by_id.push(license.clone());
|
||||||
|
self.by_struct.insert(license, id);
|
||||||
|
LicenseId(id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn resolve(&self, id: LicenseId) -> &License {
|
||||||
|
&self.by_id[id.0]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, serde::Serialize)]
|
||||||
|
#[serde(transparent)]
|
||||||
|
pub(crate) struct LicenseId(usize);
|
||||||
|
|
||||||
|
#[derive(Clone, Hash, PartialEq, Eq, serde::Serialize)]
|
||||||
|
pub(crate) struct License {
|
||||||
|
pub(crate) spdx: String,
|
||||||
|
pub(crate) copyright: Vec<String>,
|
||||||
|
}
|
30
src/tools/collect-license-metadata/src/main.rs
Normal file
30
src/tools/collect-license-metadata/src/main.rs
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
mod licenses;
|
||||||
|
mod path_tree;
|
||||||
|
mod reuse;
|
||||||
|
|
||||||
|
use crate::licenses::LicensesInterner;
|
||||||
|
use anyhow::Error;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
fn main() -> Result<(), Error> {
|
||||||
|
let reuse_exe: PathBuf = std::env::var_os("REUSE_EXE").expect("Missing REUSE_EXE").into();
|
||||||
|
let dest: PathBuf = std::env::var_os("DEST").expect("Missing DEST").into();
|
||||||
|
|
||||||
|
let mut interner = LicensesInterner::new();
|
||||||
|
let paths = crate::reuse::collect(&reuse_exe, &mut interner)?;
|
||||||
|
|
||||||
|
let mut tree = crate::path_tree::build(paths);
|
||||||
|
tree.simplify();
|
||||||
|
|
||||||
|
if let Some(parent) = dest.parent() {
|
||||||
|
std::fs::create_dir_all(parent)?;
|
||||||
|
}
|
||||||
|
std::fs::write(
|
||||||
|
&dest,
|
||||||
|
&serde_json::to_vec_pretty(&serde_json::json!({
|
||||||
|
"files": crate::path_tree::strip_interning(tree, &interner),
|
||||||
|
}))?,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
292
src/tools/collect-license-metadata/src/path_tree.rs
Normal file
292
src/tools/collect-license-metadata/src/path_tree.rs
Normal file
|
@ -0,0 +1,292 @@
|
||||||
|
//! Tools like REUSE output per-file licensing information, but we need to condense it in the
|
||||||
|
//! minimum amount of data that still represents the same licensing metadata. This module is
|
||||||
|
//! responsible for that, by turning the list of paths into a tree and executing simplification
|
||||||
|
//! passes over the tree to remove redundant information.
|
||||||
|
|
||||||
|
use crate::licenses::{License, LicenseId, LicensesInterner};
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
|
#[derive(serde::Serialize)]
|
||||||
|
#[serde(rename_all = "kebab-case", tag = "type")]
|
||||||
|
pub(crate) enum Node<L> {
|
||||||
|
Root { childs: Vec<Node<L>> },
|
||||||
|
Directory { name: PathBuf, childs: Vec<Node<L>>, license: Option<L> },
|
||||||
|
File { name: PathBuf, license: L },
|
||||||
|
FileGroup { names: Vec<PathBuf>, license: L },
|
||||||
|
Empty,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Node<LicenseId> {
|
||||||
|
pub(crate) fn simplify(&mut self) {
|
||||||
|
self.merge_directories();
|
||||||
|
self.collapse_in_licensed_directories();
|
||||||
|
self.merge_directory_licenses();
|
||||||
|
self.merge_file_groups();
|
||||||
|
self.remove_empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Initially, trees are built by the build() function with each file practically having a
|
||||||
|
/// separate directory tree, like so:
|
||||||
|
///
|
||||||
|
/// ```text
|
||||||
|
/// ┌─► ./ ──► compiler/ ──► rustc/ ──► src/ ──► main.rs
|
||||||
|
/// │
|
||||||
|
/// <root> ─┼─► ./ ──► compiler/ ──► rustc/ ──► Cargo.toml
|
||||||
|
/// │
|
||||||
|
/// └─► ./ ──► library/ ───► std/ ──► Cargo.toml
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// This pass is responsible for turning that into a proper directory tree:
|
||||||
|
///
|
||||||
|
/// ```text
|
||||||
|
/// ┌─► compiler/ ──► rustc/ ──┬─► src/ ──► main.rs
|
||||||
|
/// │ │
|
||||||
|
/// <root> ──► ./ ──┤ └─► Cargo.toml
|
||||||
|
/// │
|
||||||
|
/// └─► library/ ───► std/ ──► Cargo.toml
|
||||||
|
/// ```
|
||||||
|
fn merge_directories(&mut self) {
|
||||||
|
match self {
|
||||||
|
Node::Root { childs } | Node::Directory { childs, license: None, .. } => {
|
||||||
|
let mut directories = BTreeMap::new();
|
||||||
|
let mut files = Vec::new();
|
||||||
|
|
||||||
|
for child in childs.drain(..) {
|
||||||
|
match child {
|
||||||
|
Node::Directory { name, mut childs, license: None } => {
|
||||||
|
directories.entry(name).or_insert_with(Vec::new).append(&mut childs);
|
||||||
|
}
|
||||||
|
file @ Node::File { .. } => {
|
||||||
|
files.push(file);
|
||||||
|
}
|
||||||
|
Node::Empty => {}
|
||||||
|
Node::Root { .. } => {
|
||||||
|
panic!("can't have a root inside another element");
|
||||||
|
}
|
||||||
|
Node::FileGroup { .. } => {
|
||||||
|
panic!("FileGroup should not be present at this stage");
|
||||||
|
}
|
||||||
|
Node::Directory { license: Some(_), .. } => {
|
||||||
|
panic!("license should not be set at this stage");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
childs.extend(directories.into_iter().map(|(name, childs)| Node::Directory {
|
||||||
|
name,
|
||||||
|
childs,
|
||||||
|
license: None,
|
||||||
|
}));
|
||||||
|
childs.append(&mut files);
|
||||||
|
|
||||||
|
for child in &mut *childs {
|
||||||
|
child.merge_directories();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::Empty => {}
|
||||||
|
Node::File { .. } => {}
|
||||||
|
Node::FileGroup { .. } => {
|
||||||
|
panic!("FileGroup should not be present at this stage");
|
||||||
|
}
|
||||||
|
Node::Directory { license: Some(_), .. } => {
|
||||||
|
panic!("license should not be set at this stage");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// In our codebase, most files in a directory have the same license as the other files in that
|
||||||
|
/// same directory, so it's redundant to store licensing metadata for all the files. Instead,
|
||||||
|
/// we can add a license for a whole directory, and only record the exceptions to a directory
|
||||||
|
/// licensing metadata.
|
||||||
|
///
|
||||||
|
/// We cannot instead record only the difference to Rust's standard licensing, as the majority
|
||||||
|
/// of the files in our repository are *not* licensed under Rust's standard licensing due to
|
||||||
|
/// our inclusion of LLVM.
|
||||||
|
fn collapse_in_licensed_directories(&mut self) {
|
||||||
|
match self {
|
||||||
|
Node::Directory { childs, license, .. } => {
|
||||||
|
for child in &mut *childs {
|
||||||
|
child.collapse_in_licensed_directories();
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut licenses_count = BTreeMap::new();
|
||||||
|
for child in &*childs {
|
||||||
|
let Some(license) = child.license() else { continue };
|
||||||
|
*licenses_count.entry(license).or_insert(0) += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let most_popular_license = licenses_count
|
||||||
|
.into_iter()
|
||||||
|
.max_by_key(|(_, count)| *count)
|
||||||
|
.map(|(license, _)| license);
|
||||||
|
|
||||||
|
if let Some(most_popular_license) = most_popular_license {
|
||||||
|
childs.retain(|child| child.license() != Some(most_popular_license));
|
||||||
|
*license = Some(most_popular_license);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::Root { childs } => {
|
||||||
|
for child in &mut *childs {
|
||||||
|
child.collapse_in_licensed_directories();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::File { .. } => {}
|
||||||
|
Node::FileGroup { .. } => {}
|
||||||
|
Node::Empty => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reduce the depth of the tree by merging subdirectories with the same license as their
|
||||||
|
/// parent directory into their parent, and adjusting the paths of the childs accordingly.
|
||||||
|
fn merge_directory_licenses(&mut self) {
|
||||||
|
match self {
|
||||||
|
Node::Root { childs } => {
|
||||||
|
for child in &mut *childs {
|
||||||
|
child.merge_directory_licenses();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::Directory { childs, license, .. } => {
|
||||||
|
let mut to_add = Vec::new();
|
||||||
|
for child in &mut *childs {
|
||||||
|
child.merge_directory_licenses();
|
||||||
|
|
||||||
|
let Node::Directory {
|
||||||
|
name: child_name,
|
||||||
|
childs: child_childs,
|
||||||
|
license: child_license,
|
||||||
|
} = child else { continue };
|
||||||
|
|
||||||
|
if child_license != license {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for mut child_child in child_childs.drain(..) {
|
||||||
|
match &mut child_child {
|
||||||
|
Node::Root { .. } => {
|
||||||
|
panic!("can't have a root inside another element");
|
||||||
|
}
|
||||||
|
Node::FileGroup { .. } => {
|
||||||
|
panic!("FileGroup should not be present at this stage");
|
||||||
|
}
|
||||||
|
Node::Directory { name: child_child_name, .. } => {
|
||||||
|
*child_child_name = child_name.join(&child_child_name);
|
||||||
|
}
|
||||||
|
Node::File { name: child_child_name, .. } => {
|
||||||
|
*child_child_name = child_name.join(&child_child_name);
|
||||||
|
}
|
||||||
|
Node::Empty => {}
|
||||||
|
}
|
||||||
|
to_add.push(child_child);
|
||||||
|
}
|
||||||
|
|
||||||
|
*child = Node::Empty;
|
||||||
|
}
|
||||||
|
childs.append(&mut to_add);
|
||||||
|
}
|
||||||
|
Node::Empty => {}
|
||||||
|
Node::File { .. } => {}
|
||||||
|
Node::FileGroup { .. } => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This pass groups multiple files in a directory with the same license into a single
|
||||||
|
/// "FileGroup", so that the license of all those files can be reported as a group.
|
||||||
|
///
|
||||||
|
/// Crucially this pass runs after collapse_in_licensed_directories, so the most common license
|
||||||
|
/// will already be marked as the directory's license and won't be turned into a group.
|
||||||
|
fn merge_file_groups(&mut self) {
|
||||||
|
match self {
|
||||||
|
Node::Root { childs } | Node::Directory { childs, .. } => {
|
||||||
|
let mut grouped = BTreeMap::new();
|
||||||
|
|
||||||
|
for child in &mut *childs {
|
||||||
|
child.merge_file_groups();
|
||||||
|
if let Node::File { name, license } = child {
|
||||||
|
grouped.entry(*license).or_insert_with(Vec::new).push(name.clone());
|
||||||
|
*child = Node::Empty;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (license, mut names) in grouped.into_iter() {
|
||||||
|
if names.len() == 1 {
|
||||||
|
childs.push(Node::File { license, name: names.pop().unwrap() });
|
||||||
|
} else {
|
||||||
|
childs.push(Node::FileGroup { license, names });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::File { .. } => {}
|
||||||
|
Node::FileGroup { .. } => panic!("FileGroup should not be present at this stage"),
|
||||||
|
Node::Empty => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Some nodes were replaced with Node::Empty to mark them for deletion. As the last step, make
|
||||||
|
/// sure to remove them from the tree.
|
||||||
|
fn remove_empty(&mut self) {
|
||||||
|
match self {
|
||||||
|
Node::Root { childs } | Node::Directory { childs, .. } => {
|
||||||
|
for child in &mut *childs {
|
||||||
|
child.remove_empty();
|
||||||
|
}
|
||||||
|
childs.retain(|child| !matches!(child, Node::Empty));
|
||||||
|
}
|
||||||
|
Node::FileGroup { .. } => {}
|
||||||
|
Node::File { .. } => {}
|
||||||
|
Node::Empty => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn license(&self) -> Option<LicenseId> {
|
||||||
|
match self {
|
||||||
|
Node::Directory { childs, license: Some(license), .. } if childs.is_empty() => {
|
||||||
|
Some(*license)
|
||||||
|
}
|
||||||
|
Node::File { license, .. } => Some(*license),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn build(mut input: Vec<(PathBuf, LicenseId)>) -> Node<LicenseId> {
|
||||||
|
let mut childs = Vec::new();
|
||||||
|
|
||||||
|
// Ensure reproducibility of all future steps.
|
||||||
|
input.sort();
|
||||||
|
|
||||||
|
for (path, license) in input {
|
||||||
|
let mut node = Node::File { name: path.file_name().unwrap().into(), license };
|
||||||
|
for component in path.parent().unwrap_or_else(|| Path::new(".")).components().rev() {
|
||||||
|
node = Node::Directory {
|
||||||
|
name: component.as_os_str().into(),
|
||||||
|
childs: vec![node],
|
||||||
|
license: None,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
childs.push(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
Node::Root { childs }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn strip_interning(
|
||||||
|
node: Node<LicenseId>,
|
||||||
|
interner: &LicensesInterner,
|
||||||
|
) -> Node<&License> {
|
||||||
|
match node {
|
||||||
|
Node::Root { childs } => Node::Root {
|
||||||
|
childs: childs.into_iter().map(|child| strip_interning(child, interner)).collect(),
|
||||||
|
},
|
||||||
|
Node::Directory { name, childs, license } => Node::Directory {
|
||||||
|
childs: childs.into_iter().map(|child| strip_interning(child, interner)).collect(),
|
||||||
|
license: license.map(|license| interner.resolve(license)),
|
||||||
|
name,
|
||||||
|
},
|
||||||
|
Node::File { name, license } => Node::File { name, license: interner.resolve(license) },
|
||||||
|
Node::FileGroup { names, license } => {
|
||||||
|
Node::FileGroup { names, license: interner.resolve(license) }
|
||||||
|
}
|
||||||
|
Node::Empty => Node::Empty,
|
||||||
|
}
|
||||||
|
}
|
49
src/tools/collect-license-metadata/src/reuse.rs
Normal file
49
src/tools/collect-license-metadata/src/reuse.rs
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
use crate::licenses::{License, LicenseId, LicensesInterner};
|
||||||
|
use anyhow::Error;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::process::{Command, Stdio};
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
|
pub(crate) fn collect(
|
||||||
|
reuse_exe: &Path,
|
||||||
|
interner: &mut LicensesInterner,
|
||||||
|
) -> Result<Vec<(PathBuf, LicenseId)>, Error> {
|
||||||
|
eprintln!("gathering license information from REUSE");
|
||||||
|
let start = Instant::now();
|
||||||
|
let raw = &obtain_spdx_document(reuse_exe)?;
|
||||||
|
eprintln!("finished gathering the license information from REUSE in {:.2?}", start.elapsed());
|
||||||
|
|
||||||
|
let document = spdx_rs::parsers::spdx_from_tag_value(&raw)?;
|
||||||
|
|
||||||
|
let mut result = Vec::new();
|
||||||
|
for file in document.file_information {
|
||||||
|
let license = interner.intern(License {
|
||||||
|
spdx: file.concluded_license.to_string(),
|
||||||
|
copyright: file.copyright_text.split('\n').map(|s| s.into()).collect(),
|
||||||
|
});
|
||||||
|
|
||||||
|
result.push((file.file_name.into(), license));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn obtain_spdx_document(reuse_exe: &Path) -> Result<String, Error> {
|
||||||
|
let output = Command::new(reuse_exe)
|
||||||
|
.args(&["spdx", "--add-license-concluded", "--creator-person=bors"])
|
||||||
|
.stdout(Stdio::piped())
|
||||||
|
.spawn()?
|
||||||
|
.wait_with_output()?;
|
||||||
|
|
||||||
|
if !output.status.success() {
|
||||||
|
eprintln!();
|
||||||
|
eprintln!("Note that Rust requires some REUSE features that might not be present in the");
|
||||||
|
eprintln!("release you're using. Make sure your REUSE release includes these PRs:");
|
||||||
|
eprintln!();
|
||||||
|
eprintln!(" - https://github.com/fsfe/reuse-tool/pull/623");
|
||||||
|
eprintln!();
|
||||||
|
anyhow::bail!("collecting licensing information with REUSE failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(String::from_utf8(output.stdout)?)
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue