1
Fork 0

Rollup merge of #138465 - klensy:linkchecker-b, r=Mark-Simulacrum

linkchecker: bump html5ever

Bumping html5ever to 0.28 required small refactoring, see https://github.com/servo/html5ever/pull/548
This commit is contained in:
许杰友 Jieyou Xu (Joe) 2025-03-16 09:40:08 +08:00 committed by GitHub
commit e42f33970c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 42 additions and 59 deletions

View file

@ -1548,16 +1548,14 @@ dependencies = [
[[package]] [[package]]
name = "html5ever" name = "html5ever"
version = "0.27.0" version = "0.29.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4" checksum = "9b958f80f0fde8601dc6c08685adc743eecaa046181cebd5a57551468dfc2ddc"
dependencies = [ dependencies = [
"log", "log",
"mac", "mac",
"markup5ever", "markup5ever",
"proc-macro2", "match_token",
"quote",
"syn 2.0.96",
] ]
[[package]] [[package]]
@ -2023,7 +2021,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"windows-targets 0.52.6", "windows-targets 0.48.5",
] ]
[[package]] [[package]]
@ -2134,9 +2132,9 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]] [[package]]
name = "markup5ever" name = "markup5ever"
version = "0.12.1" version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45" checksum = "03a7b81dfb91586d0677086d40a6d755070e0799b71bb897485bac408dfd5c69"
dependencies = [ dependencies = [
"log", "log",
"phf", "phf",
@ -2146,6 +2144,17 @@ dependencies = [
"tendril", "tendril",
] ]
[[package]]
name = "match_token"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.96",
]
[[package]] [[package]]
name = "matchers" name = "matchers"
version = "0.1.0" version = "0.1.0"
@ -2628,7 +2637,7 @@ version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
dependencies = [ dependencies = [
"phf_shared 0.11.3", "phf_shared",
] ]
[[package]] [[package]]
@ -2637,18 +2646,8 @@ version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
dependencies = [ dependencies = [
"phf_generator 0.11.3", "phf_generator",
"phf_shared 0.11.3", "phf_shared",
]
[[package]]
name = "phf_generator"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
dependencies = [
"phf_shared 0.10.0",
"rand 0.8.5",
] ]
[[package]] [[package]]
@ -2657,26 +2656,17 @@ version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
dependencies = [ dependencies = [
"phf_shared 0.11.3", "phf_shared",
"rand 0.8.5", "rand 0.8.5",
] ]
[[package]]
name = "phf_shared"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
dependencies = [
"siphasher 0.3.11",
]
[[package]] [[package]]
name = "phf_shared" name = "phf_shared"
version = "0.11.3" version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
dependencies = [ dependencies = [
"siphasher 1.0.1", "siphasher",
] ]
[[package]] [[package]]
@ -4861,12 +4851,6 @@ version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1de1d4f81173b03af4c0cbed3c898f6bff5b870e4a7f5d6f4057d62a7a4b686e" checksum = "1de1d4f81173b03af4c0cbed3c898f6bff5b870e4a7f5d6f4057d62a7a4b686e"
[[package]]
name = "siphasher"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
[[package]] [[package]]
name = "siphasher" name = "siphasher"
version = "1.0.1" version = "1.0.1"
@ -4981,26 +4965,25 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]] [[package]]
name = "string_cache" name = "string_cache"
version = "0.8.7" version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" checksum = "938d512196766101d333398efde81bc1f37b00cb42c2f8350e5df639f040bbbe"
dependencies = [ dependencies = [
"new_debug_unreachable", "new_debug_unreachable",
"once_cell",
"parking_lot", "parking_lot",
"phf_shared 0.10.0", "phf_shared",
"precomputed-hash", "precomputed-hash",
"serde", "serde",
] ]
[[package]] [[package]]
name = "string_cache_codegen" name = "string_cache_codegen"
version = "0.5.2" version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988" checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0"
dependencies = [ dependencies = [
"phf_generator 0.10.0", "phf_generator",
"phf_shared 0.10.0", "phf_shared",
"proc-macro2", "proc-macro2",
"quote", "quote",
] ]

View file

@ -9,4 +9,4 @@ path = "main.rs"
[dependencies] [dependencies]
regex = "1" regex = "1"
html5ever = "0.27.0" html5ever = "0.29.0"

View file

@ -16,7 +16,7 @@
//! A few exceptions are allowed as there's known bugs in rustdoc, but this //! A few exceptions are allowed as there's known bugs in rustdoc, but this
//! should catch the majority of "broken link" cases. //! should catch the majority of "broken link" cases.
use std::cell::RefCell; use std::cell::{Cell, RefCell};
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::io::ErrorKind; use std::io::ErrorKind;
use std::path::{Component, Path, PathBuf}; use std::path::{Component, Path, PathBuf};
@ -544,7 +544,7 @@ fn parse_html<Sink: TokenSink>(source: &str, sink: Sink) -> Sink {
let mut input = BufferQueue::default(); let mut input = BufferQueue::default();
input.push_back(tendril.try_reinterpret().unwrap()); input.push_back(tendril.try_reinterpret().unwrap());
let mut tok = Tokenizer::new(sink, TokenizerOpts::default()); let tok = Tokenizer::new(sink, TokenizerOpts::default());
let _ = tok.feed(&mut input); let _ = tok.feed(&mut input);
assert!(input.is_empty()); assert!(input.is_empty());
tok.end(); tok.end();
@ -554,8 +554,8 @@ fn parse_html<Sink: TokenSink>(source: &str, sink: Sink) -> Sink {
#[derive(Default)] #[derive(Default)]
struct AttrCollector { struct AttrCollector {
attr_name: &'static [u8], attr_name: &'static [u8],
base: Option<String>, base: Cell<Option<String>>,
found_attrs: Vec<(u64, String)>, found_attrs: RefCell<Vec<(u64, String)>>,
/// Tracks whether or not it is inside a <script> tag. /// Tracks whether or not it is inside a <script> tag.
/// ///
/// A lot of our sources have JSON script tags which have HTML embedded /// A lot of our sources have JSON script tags which have HTML embedded
@ -564,13 +564,13 @@ struct AttrCollector {
/// `TokenSinkResult::Script(…)` (and then maybe switch parser?), but I /// `TokenSinkResult::Script(…)` (and then maybe switch parser?), but I
/// don't fully understand the best way to use that, and this seems good /// don't fully understand the best way to use that, and this seems good
/// enough for now. /// enough for now.
in_script: bool, in_script: Cell<bool>,
} }
impl TokenSink for AttrCollector { impl TokenSink for AttrCollector {
type Handle = (); type Handle = ();
fn process_token(&mut self, token: Token, line_number: u64) -> TokenSinkResult<()> { fn process_token(&self, token: Token, line_number: u64) -> TokenSinkResult<()> {
match token { match token {
TagToken(tag) => { TagToken(tag) => {
let tag_name = tag.name.as_bytes(); let tag_name = tag.name.as_bytes();
@ -578,20 +578,20 @@ impl TokenSink for AttrCollector {
if let Some(href) = if let Some(href) =
tag.attrs.iter().find(|attr| attr.name.local.as_bytes() == b"href") tag.attrs.iter().find(|attr| attr.name.local.as_bytes() == b"href")
{ {
self.base = Some(href.value.to_string()); self.base.set(Some(href.value.to_string()));
} }
return TokenSinkResult::Continue; return TokenSinkResult::Continue;
} else if tag_name == b"script" { } else if tag_name == b"script" {
self.in_script = !self.in_script; self.in_script.set(!self.in_script.get());
} }
if self.in_script { if self.in_script.get() {
return TokenSinkResult::Continue; return TokenSinkResult::Continue;
} }
for attr in tag.attrs.iter() { for attr in tag.attrs.iter() {
let name = attr.name.local.as_bytes(); let name = attr.name.local.as_bytes();
if name == self.attr_name { if name == self.attr_name {
let url = attr.value.to_string(); let url = attr.value.to_string();
self.found_attrs.push((line_number, url)); self.found_attrs.borrow_mut().push((line_number, url));
} }
} }
} }
@ -607,7 +607,7 @@ impl TokenSink for AttrCollector {
fn get_urls(source: &str) -> (Option<String>, Vec<(u64, String)>) { fn get_urls(source: &str) -> (Option<String>, Vec<(u64, String)>) {
let collector = AttrCollector { attr_name: b"href", ..AttrCollector::default() }; let collector = AttrCollector { attr_name: b"href", ..AttrCollector::default() };
let sink = parse_html(source, collector); let sink = parse_html(source, collector);
(sink.base, sink.found_attrs) (sink.base.into_inner(), sink.found_attrs.into_inner())
} }
/// Retrieves id="..." attributes from HTML elements. /// Retrieves id="..." attributes from HTML elements.
@ -619,7 +619,7 @@ fn parse_ids(ids: &mut HashSet<String>, file: &str, source: &str, report: &mut R
let collector = AttrCollector { attr_name: b"id", ..AttrCollector::default() }; let collector = AttrCollector { attr_name: b"id", ..AttrCollector::default() };
let sink = parse_html(source, collector); let sink = parse_html(source, collector);
for (line_number, id) in sink.found_attrs { for (line_number, id) in sink.found_attrs.into_inner() {
let encoded = small_url_encode(&id); let encoded = small_url_encode(&id);
if let Some(id) = ids.replace(id) { if let Some(id) = ids.replace(id) {
report.errors += 1; report.errors += 1;