From c3954b358f9e2be96d0dd553a8efaefc49f0bba7 Mon Sep 17 00:00:00 2001 From: r0cky Date: Sun, 3 Mar 2024 00:57:37 +0800 Subject: [PATCH] Add a tidy check that checks whether the fluent slugs only appear once --- src/tools/tidy/src/fluent_alphabetical.rs | 58 ++++++++++++++++++++--- src/tools/tidy/src/fluent_used.rs | 43 +++++++++++++++++ src/tools/tidy/src/lib.rs | 1 + 3 files changed, 96 insertions(+), 6 deletions(-) create mode 100644 src/tools/tidy/src/fluent_used.rs diff --git a/src/tools/tidy/src/fluent_alphabetical.rs b/src/tools/tidy/src/fluent_alphabetical.rs index 67b745373f0..9803b6eab2d 100644 --- a/src/tools/tidy/src/fluent_alphabetical.rs +++ b/src/tools/tidy/src/fluent_alphabetical.rs @@ -1,6 +1,7 @@ //! Checks that all Flunt files have messages in alphabetical order use crate::walk::{filter_dirs, walk}; +use std::collections::HashMap; use std::{fs::OpenOptions, io::Write, path::Path}; use regex::Regex; @@ -13,11 +14,27 @@ fn filter_fluent(path: &Path) -> bool { if let Some(ext) = path.extension() { ext.to_str() != Some("ftl") } else { true } } -fn check_alphabetic(filename: &str, fluent: &str, bad: &mut bool) { +fn check_alphabetic( + filename: &str, + fluent: &str, + bad: &mut bool, + all_defined_msgs: &mut HashMap, +) { let mut matches = MESSAGE.captures_iter(fluent).peekable(); while let Some(m) = matches.next() { + let name = m.get(1).unwrap(); + if let Some(defined_filename) = all_defined_msgs.get(name.as_str()) { + tidy_error!( + bad, + "{filename}: message `{}` is already defined in {}", + name.as_str(), + defined_filename, + ); + } + + all_defined_msgs.insert(name.as_str().to_owned(), filename.to_owned()); + if let Some(next) = matches.peek() { - let name = m.get(1).unwrap(); let next = next.get(1).unwrap(); if name.as_str() > next.as_str() { tidy_error!( @@ -34,13 +51,29 @@ run `./x.py test tidy --bless` to sort the file correctly", } } -fn sort_messages(fluent: &str) -> String { +fn sort_messages( + filename: &str, + fluent: &str, + bad: &mut bool, + all_defined_msgs: &mut HashMap, +) -> String { let mut chunks = vec![]; let mut cur = String::new(); for line in fluent.lines() { - if MESSAGE.is_match(line) { + if let Some(name) = MESSAGE.find(line) { + if let Some(defined_filename) = all_defined_msgs.get(name.as_str()) { + tidy_error!( + bad, + "{filename}: message `{}` is already defined in {}", + name.as_str(), + defined_filename, + ); + } + + all_defined_msgs.insert(name.as_str().to_owned(), filename.to_owned()); chunks.push(std::mem::take(&mut cur)); } + cur += line; cur.push('\n'); } @@ -53,20 +86,33 @@ fn sort_messages(fluent: &str) -> String { } pub fn check(path: &Path, bless: bool, bad: &mut bool) { + let mut all_defined_msgs = HashMap::new(); walk( path, |path, is_dir| filter_dirs(path) || (!is_dir && filter_fluent(path)), &mut |ent, contents| { if bless { - let sorted = sort_messages(contents); + let sorted = sort_messages( + ent.path().to_str().unwrap(), + contents, + bad, + &mut all_defined_msgs, + ); if sorted != contents { let mut f = OpenOptions::new().write(true).truncate(true).open(ent.path()).unwrap(); f.write(sorted.as_bytes()).unwrap(); } } else { - check_alphabetic(ent.path().to_str().unwrap(), contents, bad); + check_alphabetic( + ent.path().to_str().unwrap(), + contents, + bad, + &mut all_defined_msgs, + ); } }, ); + + crate::fluent_used::check(path, all_defined_msgs, bad); } diff --git a/src/tools/tidy/src/fluent_used.rs b/src/tools/tidy/src/fluent_used.rs new file mode 100644 index 00000000000..b73e79cb38d --- /dev/null +++ b/src/tools/tidy/src/fluent_used.rs @@ -0,0 +1,43 @@ +//! Checks that all Fluent messages appear at least twice + +use crate::walk::{filter_dirs, walk}; +use regex::Regex; +use std::collections::HashMap; +use std::path::Path; + +lazy_static::lazy_static! { + static ref WORD: Regex = Regex::new(r"\w+").unwrap(); +} + +fn filter_used_messages( + contents: &str, + msgs_not_appeared_yet: &mut HashMap, + msgs_appeared_only_once: &mut HashMap, +) { + // we don't just check messages never appear in Rust files, + // because messages can be used as parts of other fluent messages in Fluent files, + // so we do checking messages appear only once in all Rust and Fluent files. + let mut matches = WORD.find_iter(contents); + while let Some(name) = matches.next() { + if let Some((name, filename)) = msgs_not_appeared_yet.remove_entry(name.as_str()) { + // if one msg appears for the first time, + // remove it from `msgs_not_appeared_yet` and insert it into `msgs_appeared_only_once`. + msgs_appeared_only_once.insert(name, filename); + } else { + // if one msg appears for the second time, + // remove it from `msgs_appeared_only_once`. + msgs_appeared_only_once.remove(name.as_str()); + } + } +} + +pub fn check(path: &Path, mut all_defined_msgs: HashMap, bad: &mut bool) { + let mut msgs_appear_only_once = HashMap::new(); + walk(path, |path, _| filter_dirs(path), &mut |_, contents| { + filter_used_messages(contents, &mut all_defined_msgs, &mut msgs_appear_only_once); + }); + + for (name, filename) in msgs_appear_only_once { + tidy_error!(bad, "{filename}: message `{}` is not used", name,); + } +} diff --git a/src/tools/tidy/src/lib.rs b/src/tools/tidy/src/lib.rs index 6f3ade0ab58..670b7eb2be9 100644 --- a/src/tools/tidy/src/lib.rs +++ b/src/tools/tidy/src/lib.rs @@ -65,6 +65,7 @@ pub mod ext_tool_checks; pub mod extdeps; pub mod features; pub mod fluent_alphabetical; +mod fluent_used; pub(crate) mod iter_header; pub mod mir_opt_tests; pub mod pal;