diff --git a/src/bootstrap/bootstrap.py b/src/bootstrap/bootstrap.py index 6659894a171..5de7e6957c6 100644 --- a/src/bootstrap/bootstrap.py +++ b/src/bootstrap/bootstrap.py @@ -73,7 +73,8 @@ class RustBuild: if self.rustc().startswith(self.bin_root()) and \ (not os.path.exists(self.rustc()) or self.rustc_out_of_date()): - shutil.rmtree(self.bin_root()) + if os.path.exists(self.bin_root()): + shutil.rmtree(self.bin_root()) filename = "rust-std-nightly-" + self.build + ".tar.gz" url = "https://static.rust-lang.org/dist/" + self.snap_rustc_date() tarball = os.path.join(rustc_cache, filename) diff --git a/src/bootstrap/build/check.rs b/src/bootstrap/build/check.rs new file mode 100644 index 00000000000..19293e80217 --- /dev/null +++ b/src/bootstrap/build/check.rs @@ -0,0 +1,21 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::process::Command; + +use build::{Build, Compiler}; + +pub fn linkcheck(build: &Build, stage: u32, host: &str) { + println!("Linkcheck stage{} ({})", stage, host); + let compiler = Compiler::new(stage, host); + let linkchecker = build.tool(&compiler, "linkchecker"); + build.run(Command::new(&linkchecker) + .arg(build.out.join(host).join("doc"))); +} diff --git a/src/bootstrap/build/mod.rs b/src/bootstrap/build/mod.rs index 825cca6563c..9f24fba5843 100644 --- a/src/bootstrap/build/mod.rs +++ b/src/bootstrap/build/mod.rs @@ -30,6 +30,7 @@ macro_rules! t { mod cc; mod channel; +mod check; mod clean; mod compile; mod config; @@ -171,6 +172,9 @@ impl Build { Rustc { stage } => { compile::assemble_rustc(self, stage, target.target); } + ToolLinkchecker { stage } => { + compile::tool(self, stage, target.target, "linkchecker"); + } ToolRustbook { stage } => { compile::tool(self, stage, target.target, "rustbook"); } @@ -195,6 +199,10 @@ impl Build { doc::rustc(self, stage, target.target, &doc_out); } + CheckLinkcheck { stage } => { + check::linkcheck(self, stage, target.target); + } + Doc { .. } | // pseudo-steps Check { .. } => {} } diff --git a/src/bootstrap/build/step.rs b/src/bootstrap/build/step.rs index 23c678df9ac..7921edcff55 100644 --- a/src/bootstrap/build/step.rs +++ b/src/bootstrap/build/step.rs @@ -46,6 +46,7 @@ macro_rules! targets { }), // Various tools that we can build as part of the build. + (tool_linkchecker, ToolLinkchecker { stage: u32 }), (tool_rustbook, ToolRustbook { stage: u32 }), // Steps for long-running native builds. Ideally these wouldn't @@ -71,6 +72,7 @@ macro_rules! targets { // Steps for running tests. The 'check' target is just a pseudo // target to depend on a bunch of others. (check, Check { stage: u32, compiler: Compiler<'a> }), + (check_linkcheck, CheckLinkcheck { stage: u32 }), } } } @@ -200,6 +202,8 @@ fn add_steps<'a>(build: &'a Build, } targets!(add_step); + + panic!("unknown step: {}", step); } } @@ -273,7 +277,15 @@ impl<'a> Step<'a> { self.doc_std(stage)] } Source::Check { stage, compiler: _ } => { - vec![] + vec![self.check_linkcheck(stage)] + } + Source::CheckLinkcheck { stage } => { + vec![self.tool_linkchecker(stage), self.doc(stage)] + } + + Source::ToolLinkchecker { stage } => { + vec![self.libstd(stage, self.compiler(stage))] + } Source::ToolRustbook { stage } => { vec![self.librustc(stage, self.compiler(stage))] } diff --git a/src/tools/linkchecker/Cargo.lock b/src/tools/linkchecker/Cargo.lock new file mode 100644 index 00000000000..8e94137d213 --- /dev/null +++ b/src/tools/linkchecker/Cargo.lock @@ -0,0 +1,64 @@ +[root] +name = "linkchecker" +version = "0.1.0" +dependencies = [ + "url 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "libc" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "matches" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "rand" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rustc-serialize" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "unicode-bidi" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "matches 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "unicode-normalization" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "url" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "matches 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-serialize 0.3.18 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-bidi 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-normalization 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "uuid 0.1.18 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "uuid" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-serialize 0.3.18 (registry+https://github.com/rust-lang/crates.io-index)", +] + diff --git a/src/tools/linkchecker/Cargo.toml b/src/tools/linkchecker/Cargo.toml new file mode 100644 index 00000000000..29fc78a65e9 --- /dev/null +++ b/src/tools/linkchecker/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "linkchecker" +version = "0.1.0" +authors = ["Alex Crichton "] + +[dependencies] +url = "0.5" + +[[bin]] +name = "linkchecker" +path = "main.rs" diff --git a/src/tools/linkchecker/main.rs b/src/tools/linkchecker/main.rs new file mode 100644 index 00000000000..e5e88081bc4 --- /dev/null +++ b/src/tools/linkchecker/main.rs @@ -0,0 +1,161 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Script to check the validity of `href` links in our HTML documentation. +//! +//! In the past we've been quite error prone to writing in broken links as most +//! of them are manually rather than automatically added. As files move over +//! time or apis change old links become stale or broken. The purpose of this +//! script is to check all relative links in our documentation to make sure they +//! actually point to a valid place. +//! +//! Currently this doesn't actually do any HTML parsing or anything fancy like +//! that, it just has a simple "regex" to search for `href` tags. These values +//! are then translated to file URLs if possible and then the destination is +//! asserted to exist. +//! +//! A few whitelisted exceptions are allowed as there's known bugs in rustdoc, +//! but this should catch the majority of "broken link" cases. + +extern crate url; + +use std::env; +use std::fs::File; +use std::io::prelude::*; +use std::path::Path; + +use url::{Url, UrlParser}; + +macro_rules! t { + ($e:expr) => (match $e { + Ok(e) => e, + Err(e) => panic!("{} failed with {}", stringify!($e), e), + }) +} + +fn main() { + let docs = env::args().nth(1).unwrap(); + let docs = env::current_dir().unwrap().join(docs); + let mut url = Url::from_file_path(&docs).unwrap(); + let mut errors = false; + walk(&docs, &docs, &mut url, &mut errors); + if errors { + panic!("found some broken links"); + } +} + +fn walk(root: &Path, dir: &Path, url: &mut Url, errors: &mut bool) { + for entry in t!(dir.read_dir()).map(|e| t!(e)) { + let path = entry.path(); + let kind = t!(entry.file_type()); + url.path_mut().unwrap().push(entry.file_name().into_string().unwrap()); + if kind.is_dir() { + walk(root, &path, url, errors); + } else { + check(root, &path, url, errors); + } + url.path_mut().unwrap().pop(); + } +} + +fn check(root: &Path, file: &Path, base: &Url, errors: &mut bool) { + // ignore js files as they are not prone to errors as the rest of the + // documentation is and they otherwise bring up false positives. + if file.extension().and_then(|s| s.to_str()) == Some("js") { + return + } + + let pretty_file = file.strip_prefix(root).unwrap_or(file); + + // Unfortunately we're not 100% full of valid links today to we need a few + // whitelists to get this past `make check` today. + if let Some(path) = pretty_file.to_str() { + // FIXME(#32129) + if path == "std/string/struct.String.html" { + return + } + // FIXME(#32130) + if path.contains("btree_set/struct.BTreeSet.html") || + path == "collections/struct.BTreeSet.html" { + return + } + // FIXME(#31948) + if path.contains("ParseFloatError") { + return + } + + // currently + if path == "std/sys/ext/index.html" { + return + } + + // weird reexports, but this module is on its way out, so chalk it up to + // "rustdoc weirdness" and move on from there + if path.contains("scoped_tls") { + return + } + } + + let mut parser = UrlParser::new(); + parser.base_url(base); + let mut contents = String::new(); + if t!(File::open(file)).read_to_string(&mut contents).is_err() { + return + } + + for (i, mut line) in contents.lines().enumerate() { + // Search for anything that's the regex 'href[ ]*=[ ]*".*?"' + while let Some(j) = line.find(" href") { + let rest = &line[j + 5..]; + line = rest; + let pos_equals = match rest.find("=") { + Some(i) => i, + None => continue, + }; + if rest[..pos_equals].trim_left_matches(" ") != "" { + continue + } + let rest = &rest[pos_equals + 1..]; + let pos_quote = match rest.find("\"").or_else(|| rest.find("'")) { + Some(i) => i, + None => continue, + }; + if rest[..pos_quote].trim_left_matches(" ") != "" { + continue + } + let rest = &rest[pos_quote + 1..]; + let url = match rest.find("\"").or_else(|| rest.find("'")) { + Some(i) => &rest[..i], + None => continue, + }; + + // Once we've plucked out the URL, parse it using our base url and + // then try to extract a file path. If either if these fail then we + // just keep going. + let parsed_url = match parser.parse(url) { + Ok(url) => url, + Err(..) => continue, + }; + let path = match parsed_url.to_file_path() { + Ok(path) => path, + Err(..) => continue, + }; + + // Alright, if we've found a file name then this file had better + // exist! If it doesn't then we register and print an error. + if !path.exists() { + *errors = true; + print!("{}:{}: broken link - ", pretty_file.display(), i + 1); + let pretty_path = path.strip_prefix(root).unwrap_or(&path); + println!("{}", pretty_path.display()); + } + } + } +}