Rollup merge of #138465 - klensy:linkchecker-b, r=Mark-Simulacrum

linkchecker: bump html5ever

Bumping html5ever to 0.28 required small refactoring, see https://github.com/servo/html5ever/pull/548
This commit is contained in:
许杰友 Jieyou Xu (Joe) 2025-03-16 09:40:08 +08:00 committed by GitHub
commit e42f33970c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 42 additions and 59 deletions

View file

@ -1548,16 +1548,14 @@ dependencies = [
[[package]]
name = "html5ever"
version = "0.27.0"
version = "0.29.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4"
checksum = "9b958f80f0fde8601dc6c08685adc743eecaa046181cebd5a57551468dfc2ddc"
dependencies = [
"log",
"mac",
"markup5ever",
"proc-macro2",
"quote",
"syn 2.0.96",
"match_token",
]
[[package]]
@ -2023,7 +2021,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34"
dependencies = [
"cfg-if",
"windows-targets 0.52.6",
"windows-targets 0.48.5",
]
[[package]]
@ -2134,9 +2132,9 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "markup5ever"
version = "0.12.1"
version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45"
checksum = "03a7b81dfb91586d0677086d40a6d755070e0799b71bb897485bac408dfd5c69"
dependencies = [
"log",
"phf",
@ -2146,6 +2144,17 @@ dependencies = [
"tendril",
]
[[package]]
name = "match_token"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.96",
]
[[package]]
name = "matchers"
version = "0.1.0"
@ -2628,7 +2637,7 @@ version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
dependencies = [
"phf_shared 0.11.3",
"phf_shared",
]
[[package]]
@ -2637,18 +2646,8 @@ version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
dependencies = [
"phf_generator 0.11.3",
"phf_shared 0.11.3",
]
[[package]]
name = "phf_generator"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
dependencies = [
"phf_shared 0.10.0",
"rand 0.8.5",
"phf_generator",
"phf_shared",
]
[[package]]
@ -2657,26 +2656,17 @@ version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
dependencies = [
"phf_shared 0.11.3",
"phf_shared",
"rand 0.8.5",
]
[[package]]
name = "phf_shared"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
dependencies = [
"siphasher 0.3.11",
]
[[package]]
name = "phf_shared"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
dependencies = [
"siphasher 1.0.1",
"siphasher",
]
[[package]]
@ -4861,12 +4851,6 @@ version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1de1d4f81173b03af4c0cbed3c898f6bff5b870e4a7f5d6f4057d62a7a4b686e"
[[package]]
name = "siphasher"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
[[package]]
name = "siphasher"
version = "1.0.1"
@ -4981,26 +4965,25 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "string_cache"
version = "0.8.7"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b"
checksum = "938d512196766101d333398efde81bc1f37b00cb42c2f8350e5df639f040bbbe"
dependencies = [
"new_debug_unreachable",
"once_cell",
"parking_lot",
"phf_shared 0.10.0",
"phf_shared",
"precomputed-hash",
"serde",
]
[[package]]
name = "string_cache_codegen"
version = "0.5.2"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988"
checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0"
dependencies = [
"phf_generator 0.10.0",
"phf_shared 0.10.0",
"phf_generator",
"phf_shared",
"proc-macro2",
"quote",
]

View file

@ -9,4 +9,4 @@ path = "main.rs"
[dependencies]
regex = "1"
html5ever = "0.27.0"
html5ever = "0.29.0"

View file

@ -16,7 +16,7 @@
//! A few exceptions are allowed as there's known bugs in rustdoc, but this
//! should catch the majority of "broken link" cases.
use std::cell::RefCell;
use std::cell::{Cell, RefCell};
use std::collections::{HashMap, HashSet};
use std::io::ErrorKind;
use std::path::{Component, Path, PathBuf};
@ -544,7 +544,7 @@ fn parse_html<Sink: TokenSink>(source: &str, sink: Sink) -> Sink {
let mut input = BufferQueue::default();
input.push_back(tendril.try_reinterpret().unwrap());
let mut tok = Tokenizer::new(sink, TokenizerOpts::default());
let tok = Tokenizer::new(sink, TokenizerOpts::default());
let _ = tok.feed(&mut input);
assert!(input.is_empty());
tok.end();
@ -554,8 +554,8 @@ fn parse_html<Sink: TokenSink>(source: &str, sink: Sink) -> Sink {
#[derive(Default)]
struct AttrCollector {
attr_name: &'static [u8],
base: Option<String>,
found_attrs: Vec<(u64, String)>,
base: Cell<Option<String>>,
found_attrs: RefCell<Vec<(u64, String)>>,
/// Tracks whether or not it is inside a <script> tag.
///
/// A lot of our sources have JSON script tags which have HTML embedded
@ -564,13 +564,13 @@ struct AttrCollector {
/// `TokenSinkResult::Script(…)` (and then maybe switch parser?), but I
/// don't fully understand the best way to use that, and this seems good
/// enough for now.
in_script: bool,
in_script: Cell<bool>,
}
impl TokenSink for AttrCollector {
type Handle = ();
fn process_token(&mut self, token: Token, line_number: u64) -> TokenSinkResult<()> {
fn process_token(&self, token: Token, line_number: u64) -> TokenSinkResult<()> {
match token {
TagToken(tag) => {
let tag_name = tag.name.as_bytes();
@ -578,20 +578,20 @@ impl TokenSink for AttrCollector {
if let Some(href) =
tag.attrs.iter().find(|attr| attr.name.local.as_bytes() == b"href")
{
self.base = Some(href.value.to_string());
self.base.set(Some(href.value.to_string()));
}
return TokenSinkResult::Continue;
} else if tag_name == b"script" {
self.in_script = !self.in_script;
self.in_script.set(!self.in_script.get());
}
if self.in_script {
if self.in_script.get() {
return TokenSinkResult::Continue;
}
for attr in tag.attrs.iter() {
let name = attr.name.local.as_bytes();
if name == self.attr_name {
let url = attr.value.to_string();
self.found_attrs.push((line_number, url));
self.found_attrs.borrow_mut().push((line_number, url));
}
}
}
@ -607,7 +607,7 @@ impl TokenSink for AttrCollector {
fn get_urls(source: &str) -> (Option<String>, Vec<(u64, String)>) {
let collector = AttrCollector { attr_name: b"href", ..AttrCollector::default() };
let sink = parse_html(source, collector);
(sink.base, sink.found_attrs)
(sink.base.into_inner(), sink.found_attrs.into_inner())
}
/// Retrieves id="..." attributes from HTML elements.
@ -619,7 +619,7 @@ fn parse_ids(ids: &mut HashSet<String>, file: &str, source: &str, report: &mut R
let collector = AttrCollector { attr_name: b"id", ..AttrCollector::default() };
let sink = parse_html(source, collector);
for (line_number, id) in sink.found_attrs {
for (line_number, id) in sink.found_attrs.into_inner() {
let encoded = small_url_encode(&id);
if let Some(id) = ids.replace(id) {
report.errors += 1;