Add invalid_from_utf8 analogous to invalid_from_utf8_unchecked
This commit is contained in:
parent
a0612d90b0
commit
7f99c7d3e6
7 changed files with 169 additions and 21 deletions
|
@ -304,6 +304,10 @@ lint_improper_ctypes_union_layout_help = consider adding a `#[repr(C)]` or `#[re
|
||||||
lint_improper_ctypes_union_layout_reason = this union has unspecified layout
|
lint_improper_ctypes_union_layout_reason = this union has unspecified layout
|
||||||
lint_improper_ctypes_union_non_exhaustive = this union is non-exhaustive
|
lint_improper_ctypes_union_non_exhaustive = this union is non-exhaustive
|
||||||
|
|
||||||
|
# FIXME: we should ordinalize $valid_up_to when we add support for doing so
|
||||||
|
lint_invalid_from_utf8_checked = calls to `{$method}` with a invalid literal always return an error
|
||||||
|
.label = the literal was valid UTF-8 up to the {$valid_up_to} bytes
|
||||||
|
|
||||||
# FIXME: we should ordinalize $valid_up_to when we add support for doing so
|
# FIXME: we should ordinalize $valid_up_to when we add support for doing so
|
||||||
lint_invalid_from_utf8_unchecked = calls to `{$method}` with a invalid literal are undefined behavior
|
lint_invalid_from_utf8_unchecked = calls to `{$method}` with a invalid literal are undefined behavior
|
||||||
.label = the literal was valid UTF-8 up to the {$valid_up_to} bytes
|
.label = the literal was valid UTF-8 up to the {$valid_up_to} bytes
|
||||||
|
|
|
@ -5,7 +5,7 @@ use rustc_hir::{Expr, ExprKind};
|
||||||
use rustc_span::source_map::Spanned;
|
use rustc_span::source_map::Spanned;
|
||||||
use rustc_span::sym;
|
use rustc_span::sym;
|
||||||
|
|
||||||
use crate::lints::InvalidFromUtf8UncheckedDiag;
|
use crate::lints::InvalidFromUtf8Diag;
|
||||||
use crate::{LateContext, LateLintPass, LintContext};
|
use crate::{LateContext, LateLintPass, LintContext};
|
||||||
|
|
||||||
declare_lint! {
|
declare_lint! {
|
||||||
|
@ -33,7 +33,30 @@ declare_lint! {
|
||||||
"using a non UTF-8 literal in `std::str::from_utf8_unchecked`"
|
"using a non UTF-8 literal in `std::str::from_utf8_unchecked`"
|
||||||
}
|
}
|
||||||
|
|
||||||
declare_lint_pass!(InvalidFromUtf8 => [INVALID_FROM_UTF8_UNCHECKED]);
|
declare_lint! {
|
||||||
|
/// The `invalid_from_utf8` lint checks for calls to
|
||||||
|
/// `std::str::from_utf8` and `std::str::from_utf8_mut`
|
||||||
|
/// with an invalid UTF-8 literal.
|
||||||
|
///
|
||||||
|
/// ### Example
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// # #[allow(unused)]
|
||||||
|
/// std::str::from_utf8(b"Ru\x82st");
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// {{produces}}
|
||||||
|
///
|
||||||
|
/// ### Explanation
|
||||||
|
///
|
||||||
|
/// Trying to create such a `str` would always return an error as per documentation
|
||||||
|
/// for `std::str::from_utf8` and `std::str::from_utf8_mut`.
|
||||||
|
pub INVALID_FROM_UTF8,
|
||||||
|
Warn,
|
||||||
|
"using a non UTF-8 literal in `std::str::from_utf8`"
|
||||||
|
}
|
||||||
|
|
||||||
|
declare_lint_pass!(InvalidFromUtf8 => [INVALID_FROM_UTF8_UNCHECKED, INVALID_FROM_UTF8]);
|
||||||
|
|
||||||
impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
|
impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
|
||||||
fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) {
|
fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) {
|
||||||
|
@ -41,15 +64,25 @@ impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
|
||||||
&& let ExprKind::Path(ref qpath) = path.kind
|
&& let ExprKind::Path(ref qpath) = path.kind
|
||||||
&& let Some(def_id) = cx.qpath_res(qpath, path.hir_id).opt_def_id()
|
&& let Some(def_id) = cx.qpath_res(qpath, path.hir_id).opt_def_id()
|
||||||
&& let Some(diag_item) = cx.tcx.get_diagnostic_name(def_id)
|
&& let Some(diag_item) = cx.tcx.get_diagnostic_name(def_id)
|
||||||
&& [sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item)
|
&& [sym::str_from_utf8, sym::str_from_utf8_mut,
|
||||||
|
sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item)
|
||||||
{
|
{
|
||||||
let lint = |utf8_error: Utf8Error| {
|
let lint = |utf8_error: Utf8Error| {
|
||||||
|
let label = arg.span;
|
||||||
let method = diag_item.as_str().strip_prefix("str_").unwrap();
|
let method = diag_item.as_str().strip_prefix("str_").unwrap();
|
||||||
cx.emit_spanned_lint(INVALID_FROM_UTF8_UNCHECKED, expr.span, InvalidFromUtf8UncheckedDiag {
|
let method = format!("std::str::{method}");
|
||||||
method: format!("std::str::{method}"),
|
let valid_up_to = utf8_error.valid_up_to();
|
||||||
valid_up_to: utf8_error.valid_up_to(),
|
let is_unchecked_variant = diag_item.as_str().contains("unchecked");
|
||||||
label: arg.span,
|
|
||||||
})
|
cx.emit_spanned_lint(
|
||||||
|
if is_unchecked_variant { INVALID_FROM_UTF8_UNCHECKED } else { INVALID_FROM_UTF8 },
|
||||||
|
expr.span,
|
||||||
|
if is_unchecked_variant {
|
||||||
|
InvalidFromUtf8Diag::Unchecked { method, valid_up_to, label }
|
||||||
|
} else {
|
||||||
|
InvalidFromUtf8Diag::Checked { method, valid_up_to, label }
|
||||||
|
}
|
||||||
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
match &arg.kind {
|
match &arg.kind {
|
||||||
|
|
|
@ -701,12 +701,21 @@ pub struct ForgetCopyDiag<'a> {
|
||||||
|
|
||||||
// invalid_from_utf8.rs
|
// invalid_from_utf8.rs
|
||||||
#[derive(LintDiagnostic)]
|
#[derive(LintDiagnostic)]
|
||||||
#[diag(lint_invalid_from_utf8_unchecked)]
|
pub enum InvalidFromUtf8Diag {
|
||||||
pub struct InvalidFromUtf8UncheckedDiag {
|
#[diag(lint_invalid_from_utf8_unchecked)]
|
||||||
pub method: String,
|
Unchecked {
|
||||||
pub valid_up_to: usize,
|
method: String,
|
||||||
|
valid_up_to: usize,
|
||||||
#[label]
|
#[label]
|
||||||
pub label: Span,
|
label: Span,
|
||||||
|
},
|
||||||
|
#[diag(lint_invalid_from_utf8_checked)]
|
||||||
|
Checked {
|
||||||
|
method: String,
|
||||||
|
valid_up_to: usize,
|
||||||
|
#[label]
|
||||||
|
label: Span,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
// hidden_unicode_codepoints.rs
|
// hidden_unicode_codepoints.rs
|
||||||
|
|
|
@ -1454,6 +1454,8 @@ symbols! {
|
||||||
stop_after_dataflow,
|
stop_after_dataflow,
|
||||||
store,
|
store,
|
||||||
str,
|
str,
|
||||||
|
str_from_utf8,
|
||||||
|
str_from_utf8_mut,
|
||||||
str_from_utf8_unchecked,
|
str_from_utf8_unchecked,
|
||||||
str_from_utf8_unchecked_mut,
|
str_from_utf8_unchecked_mut,
|
||||||
str_split_whitespace,
|
str_split_whitespace,
|
||||||
|
|
|
@ -84,6 +84,7 @@ use super::Utf8Error;
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
#[rustc_const_stable(feature = "const_str_from_utf8_shared", since = "1.63.0")]
|
#[rustc_const_stable(feature = "const_str_from_utf8_shared", since = "1.63.0")]
|
||||||
#[rustc_allow_const_fn_unstable(str_internals)]
|
#[rustc_allow_const_fn_unstable(str_internals)]
|
||||||
|
#[rustc_diagnostic_item = "str_from_utf8"]
|
||||||
pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
|
pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
|
||||||
// FIXME: This should use `?` again, once it's `const`
|
// FIXME: This should use `?` again, once it's `const`
|
||||||
match run_utf8_validation(v) {
|
match run_utf8_validation(v) {
|
||||||
|
@ -127,6 +128,7 @@ pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
|
||||||
/// errors that can be returned.
|
/// errors that can be returned.
|
||||||
#[stable(feature = "str_mut_extras", since = "1.20.0")]
|
#[stable(feature = "str_mut_extras", since = "1.20.0")]
|
||||||
#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "91006")]
|
#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "91006")]
|
||||||
|
#[rustc_diagnostic_item = "str_from_utf8_mut"]
|
||||||
pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
|
pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
|
||||||
// This should use `?` again, once it's `const`
|
// This should use `?` again, once it's `const`
|
||||||
match run_utf8_validation(v) {
|
match run_utf8_validation(v) {
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
#![feature(concat_bytes)]
|
#![feature(concat_bytes)]
|
||||||
#![warn(invalid_from_utf8_unchecked)]
|
#![warn(invalid_from_utf8_unchecked)]
|
||||||
|
#![warn(invalid_from_utf8)]
|
||||||
|
|
||||||
pub fn from_utf8_unchecked_mut() {
|
pub fn from_utf8_unchecked_mut() {
|
||||||
// Valid
|
// Valid
|
||||||
|
@ -46,4 +47,47 @@ pub fn from_utf8_unchecked() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn from_utf8_mut() {
|
||||||
|
// Valid
|
||||||
|
{
|
||||||
|
std::str::from_utf8_mut(&mut [99, 108, 105, 112, 112, 121]);
|
||||||
|
std::str::from_utf8_mut(&mut [b'c', b'l', b'i', b'p', b'p', b'y']);
|
||||||
|
|
||||||
|
let x = 0xa0;
|
||||||
|
std::str::from_utf8_mut(&mut [0xc0, x]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Invalid
|
||||||
|
{
|
||||||
|
std::str::from_utf8_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
|
||||||
|
//~^ WARN calls to `std::str::from_utf8_mut`
|
||||||
|
std::str::from_utf8_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
|
||||||
|
//~^ WARN calls to `std::str::from_utf8_mut`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_utf8() {
|
||||||
|
// Valid
|
||||||
|
{
|
||||||
|
std::str::from_utf8(&[99, 108, 105, 112, 112, 121]);
|
||||||
|
std::str::from_utf8(&[b'c', b'l', b'i', b'p', b'p', b'y']);
|
||||||
|
std::str::from_utf8(b"clippy");
|
||||||
|
|
||||||
|
let x = 0xA0;
|
||||||
|
std::str::from_utf8(&[0xC0, x]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Invalid
|
||||||
|
{
|
||||||
|
std::str::from_utf8(&[99, 108, 130, 105, 112, 112, 121]);
|
||||||
|
//~^ WARN calls to `std::str::from_utf8`
|
||||||
|
std::str::from_utf8(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
|
||||||
|
//~^ WARN calls to `std::str::from_utf8`
|
||||||
|
std::str::from_utf8(b"cl\x82ippy");
|
||||||
|
//~^ WARN calls to `std::str::from_utf8`
|
||||||
|
std::str::from_utf8(concat_bytes!(b"cl", b"\x82ippy"));
|
||||||
|
//~^ WARN calls to `std::str::from_utf8`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn main() {}
|
fn main() {}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior
|
warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior
|
||||||
--> $DIR/invalid_from_utf8.rs:18:9
|
--> $DIR/invalid_from_utf8.rs:19:9
|
||||||
|
|
|
|
||||||
LL | std::str::from_utf8_unchecked_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
|
LL | std::str::from_utf8_unchecked_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
|
||||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^
|
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^
|
||||||
|
@ -13,7 +13,7 @@ LL | #![warn(invalid_from_utf8_unchecked)]
|
||||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior
|
warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior
|
||||||
--> $DIR/invalid_from_utf8.rs:20:9
|
--> $DIR/invalid_from_utf8.rs:21:9
|
||||||
|
|
|
|
||||||
LL | std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
|
LL | std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
|
||||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^
|
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^
|
||||||
|
@ -21,7 +21,7 @@ LL | std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'\x82', b'i',
|
||||||
| the literal was valid UTF-8 up to the 2 bytes
|
| the literal was valid UTF-8 up to the 2 bytes
|
||||||
|
|
||||||
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
|
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
|
||||||
--> $DIR/invalid_from_utf8.rs:38:9
|
--> $DIR/invalid_from_utf8.rs:39:9
|
||||||
|
|
|
|
||||||
LL | std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]);
|
LL | std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]);
|
||||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-----------------------------------^
|
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-----------------------------------^
|
||||||
|
@ -29,7 +29,7 @@ LL | std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]);
|
||||||
| the literal was valid UTF-8 up to the 2 bytes
|
| the literal was valid UTF-8 up to the 2 bytes
|
||||||
|
|
||||||
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
|
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
|
||||||
--> $DIR/invalid_from_utf8.rs:40:9
|
--> $DIR/invalid_from_utf8.rs:41:9
|
||||||
|
|
|
|
||||||
LL | std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
|
LL | std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
|
||||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------------------^
|
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------------------^
|
||||||
|
@ -37,7 +37,7 @@ LL | std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'
|
||||||
| the literal was valid UTF-8 up to the 2 bytes
|
| the literal was valid UTF-8 up to the 2 bytes
|
||||||
|
|
||||||
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
|
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
|
||||||
--> $DIR/invalid_from_utf8.rs:42:9
|
--> $DIR/invalid_from_utf8.rs:43:9
|
||||||
|
|
|
|
||||||
LL | std::str::from_utf8_unchecked(b"cl\x82ippy");
|
LL | std::str::from_utf8_unchecked(b"cl\x82ippy");
|
||||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-------------^
|
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-------------^
|
||||||
|
@ -45,12 +45,66 @@ LL | std::str::from_utf8_unchecked(b"cl\x82ippy");
|
||||||
| the literal was valid UTF-8 up to the 2 bytes
|
| the literal was valid UTF-8 up to the 2 bytes
|
||||||
|
|
||||||
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
|
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
|
||||||
--> $DIR/invalid_from_utf8.rs:44:9
|
--> $DIR/invalid_from_utf8.rs:45:9
|
||||||
|
|
|
|
||||||
LL | std::str::from_utf8_unchecked(concat_bytes!(b"cl", b"\x82ippy"));
|
LL | std::str::from_utf8_unchecked(concat_bytes!(b"cl", b"\x82ippy"));
|
||||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------^
|
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------^
|
||||||
| |
|
| |
|
||||||
| the literal was valid UTF-8 up to the 2 bytes
|
| the literal was valid UTF-8 up to the 2 bytes
|
||||||
|
|
||||||
warning: 6 warnings emitted
|
warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
|
||||||
|
--> $DIR/invalid_from_utf8.rs:62:9
|
||||||
|
|
|
||||||
|
LL | std::str::from_utf8_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
|
||||||
|
| ^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^
|
||||||
|
| |
|
||||||
|
| the literal was valid UTF-8 up to the 2 bytes
|
||||||
|
|
|
||||||
|
note: the lint level is defined here
|
||||||
|
--> $DIR/invalid_from_utf8.rs:5:9
|
||||||
|
|
|
||||||
|
LL | #![warn(invalid_from_utf8)]
|
||||||
|
| ^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
|
||||||
|
--> $DIR/invalid_from_utf8.rs:64:9
|
||||||
|
|
|
||||||
|
LL | std::str::from_utf8_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
|
||||||
|
| ^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^
|
||||||
|
| |
|
||||||
|
| the literal was valid UTF-8 up to the 2 bytes
|
||||||
|
|
||||||
|
warning: calls to `std::str::from_utf8` with a invalid literal always return an error
|
||||||
|
--> $DIR/invalid_from_utf8.rs:82:9
|
||||||
|
|
|
||||||
|
LL | std::str::from_utf8(&[99, 108, 130, 105, 112, 112, 121]);
|
||||||
|
| ^^^^^^^^^^^^^^^^^^^^-----------------------------------^
|
||||||
|
| |
|
||||||
|
| the literal was valid UTF-8 up to the 2 bytes
|
||||||
|
|
||||||
|
warning: calls to `std::str::from_utf8` with a invalid literal always return an error
|
||||||
|
--> $DIR/invalid_from_utf8.rs:84:9
|
||||||
|
|
|
||||||
|
LL | std::str::from_utf8(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
|
||||||
|
| ^^^^^^^^^^^^^^^^^^^^----------------------------------------------^
|
||||||
|
| |
|
||||||
|
| the literal was valid UTF-8 up to the 2 bytes
|
||||||
|
|
||||||
|
warning: calls to `std::str::from_utf8` with a invalid literal always return an error
|
||||||
|
--> $DIR/invalid_from_utf8.rs:86:9
|
||||||
|
|
|
||||||
|
LL | std::str::from_utf8(b"cl\x82ippy");
|
||||||
|
| ^^^^^^^^^^^^^^^^^^^^-------------^
|
||||||
|
| |
|
||||||
|
| the literal was valid UTF-8 up to the 2 bytes
|
||||||
|
|
||||||
|
warning: calls to `std::str::from_utf8` with a invalid literal always return an error
|
||||||
|
--> $DIR/invalid_from_utf8.rs:88:9
|
||||||
|
|
|
||||||
|
LL | std::str::from_utf8(concat_bytes!(b"cl", b"\x82ippy"));
|
||||||
|
| ^^^^^^^^^^^^^^^^^^^^---------------------------------^
|
||||||
|
| |
|
||||||
|
| the literal was valid UTF-8 up to the 2 bytes
|
||||||
|
|
||||||
|
warning: 12 warnings emitted
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue