Point at invalid utf-8 span on user's source code
``` error: couldn't read `$DIR/not-utf8-bin-file.rs`: stream did not contain valid UTF-8 --> $DIR/not-utf8-2.rs:6:5 | LL | include!("not-utf8-bin-file.rs"); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | note: `[193]` is not valid utf-8 --> $DIR/not-utf8-bin-file.rs:2:14 | LL | let _ = "�|�␂!5�cc␕␂��"; | ^ = note: this error originates in the macro `include` (in Nightly builds, run with -Z macro-backtrace for more info) ``` When we attempt to load a Rust source code file, if there is a OS file failure we try reading the file as bytes. If that succeeds we try to turn it into UTF-8. If *that* fails, we provide additional context about *where* the file has the first invalid UTF-8 character. Fix #76869.
This commit is contained in:
parent
6a64e3b897
commit
57dd42d613
14 changed files with 88 additions and 20 deletions
|
@ -16,6 +16,7 @@
|
||||||
#![feature(proc_macro_internals)]
|
#![feature(proc_macro_internals)]
|
||||||
#![feature(proc_macro_quote)]
|
#![feature(proc_macro_quote)]
|
||||||
#![feature(rustdoc_internals)]
|
#![feature(rustdoc_internals)]
|
||||||
|
#![feature(string_from_utf8_lossy_owned)]
|
||||||
#![feature(try_blocks)]
|
#![feature(try_blocks)]
|
||||||
#![warn(unreachable_pub)]
|
#![warn(unreachable_pub)]
|
||||||
// tidy-alphabetical-end
|
// tidy-alphabetical-end
|
||||||
|
|
|
@ -13,7 +13,7 @@ use rustc_expand::base::{
|
||||||
use rustc_expand::module::DirOwnership;
|
use rustc_expand::module::DirOwnership;
|
||||||
use rustc_lint_defs::BuiltinLintDiag;
|
use rustc_lint_defs::BuiltinLintDiag;
|
||||||
use rustc_parse::parser::{ForceCollect, Parser};
|
use rustc_parse::parser::{ForceCollect, Parser};
|
||||||
use rustc_parse::{new_parser_from_file, unwrap_or_emit_fatal};
|
use rustc_parse::{new_parser_from_file, unwrap_or_emit_fatal, utf8_error};
|
||||||
use rustc_session::lint::builtin::INCOMPLETE_INCLUDE;
|
use rustc_session::lint::builtin::INCOMPLETE_INCLUDE;
|
||||||
use rustc_span::source_map::SourceMap;
|
use rustc_span::source_map::SourceMap;
|
||||||
use rustc_span::{Pos, Span, Symbol};
|
use rustc_span::{Pos, Span, Symbol};
|
||||||
|
@ -209,9 +209,10 @@ pub(crate) fn expand_include_str(
|
||||||
let interned_src = Symbol::intern(src);
|
let interned_src = Symbol::intern(src);
|
||||||
MacEager::expr(cx.expr_str(cx.with_def_site_ctxt(bsp), interned_src))
|
MacEager::expr(cx.expr_str(cx.with_def_site_ctxt(bsp), interned_src))
|
||||||
}
|
}
|
||||||
Err(_) => {
|
Err(utf8err) => {
|
||||||
let guar = cx.dcx().span_err(sp, format!("`{path}` wasn't a utf-8 file"));
|
let mut err = cx.dcx().struct_span_err(sp, format!("`{path}` wasn't a utf-8 file"));
|
||||||
DummyResult::any(sp, guar)
|
utf8_error(cx.source_map(), path.as_str(), None, &mut err, utf8err, &bytes[..]);
|
||||||
|
DummyResult::any(sp, err.emit())
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
Err(dummy) => dummy,
|
Err(dummy) => dummy,
|
||||||
|
@ -273,7 +274,7 @@ fn load_binary_file(
|
||||||
.and_then(|path| path.into_os_string().into_string().ok());
|
.and_then(|path| path.into_os_string().into_string().ok());
|
||||||
|
|
||||||
if let Some(new_path) = new_path {
|
if let Some(new_path) = new_path {
|
||||||
err.span_suggestion(
|
err.span_suggestion_verbose(
|
||||||
path_span,
|
path_span,
|
||||||
"there is a file with the same name in a different directory",
|
"there is a file with the same name in a different directory",
|
||||||
format!("\"{}\"", new_path.replace('\\', "/").escape_debug()),
|
format!("\"{}\"", new_path.replace('\\', "/").escape_debug()),
|
||||||
|
|
|
@ -11,18 +11,21 @@
|
||||||
#![feature(if_let_guard)]
|
#![feature(if_let_guard)]
|
||||||
#![feature(iter_intersperse)]
|
#![feature(iter_intersperse)]
|
||||||
#![feature(let_chains)]
|
#![feature(let_chains)]
|
||||||
|
#![feature(string_from_utf8_lossy_owned)]
|
||||||
#![warn(unreachable_pub)]
|
#![warn(unreachable_pub)]
|
||||||
// tidy-alphabetical-end
|
// tidy-alphabetical-end
|
||||||
|
|
||||||
use std::path::Path;
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::str::Utf8Error;
|
||||||
|
|
||||||
use rustc_ast as ast;
|
use rustc_ast as ast;
|
||||||
use rustc_ast::tokenstream::TokenStream;
|
use rustc_ast::tokenstream::TokenStream;
|
||||||
use rustc_ast::{AttrItem, Attribute, MetaItemInner, token};
|
use rustc_ast::{AttrItem, Attribute, MetaItemInner, token};
|
||||||
use rustc_ast_pretty::pprust;
|
use rustc_ast_pretty::pprust;
|
||||||
use rustc_data_structures::sync::Lrc;
|
use rustc_data_structures::sync::Lrc;
|
||||||
use rustc_errors::{Diag, FatalError, PResult};
|
use rustc_errors::{Diag, EmissionGuarantee, FatalError, PResult, pluralize};
|
||||||
use rustc_session::parse::ParseSess;
|
use rustc_session::parse::ParseSess;
|
||||||
|
use rustc_span::source_map::SourceMap;
|
||||||
use rustc_span::{FileName, SourceFile, Span};
|
use rustc_span::{FileName, SourceFile, Span};
|
||||||
pub use unicode_normalization::UNICODE_VERSION as UNICODE_NORMALIZATION_VERSION;
|
pub use unicode_normalization::UNICODE_VERSION as UNICODE_NORMALIZATION_VERSION;
|
||||||
|
|
||||||
|
@ -73,9 +76,22 @@ pub fn new_parser_from_file<'a>(
|
||||||
path: &Path,
|
path: &Path,
|
||||||
sp: Option<Span>,
|
sp: Option<Span>,
|
||||||
) -> Result<Parser<'a>, Vec<Diag<'a>>> {
|
) -> Result<Parser<'a>, Vec<Diag<'a>>> {
|
||||||
let source_file = psess.source_map().load_file(path).unwrap_or_else(|e| {
|
let sm = psess.source_map();
|
||||||
let msg = format!("couldn't read {}: {}", path.display(), e);
|
let source_file = sm.load_file(path).unwrap_or_else(|e| {
|
||||||
|
let msg = format!("couldn't read `{}`: {}", path.display(), e);
|
||||||
let mut err = psess.dcx().struct_fatal(msg);
|
let mut err = psess.dcx().struct_fatal(msg);
|
||||||
|
if let Ok(contents) = std::fs::read(path)
|
||||||
|
&& let Err(utf8err) = String::from_utf8(contents.clone())
|
||||||
|
{
|
||||||
|
utf8_error(
|
||||||
|
sm,
|
||||||
|
&path.display().to_string(),
|
||||||
|
sp,
|
||||||
|
&mut err,
|
||||||
|
utf8err.utf8_error(),
|
||||||
|
&contents,
|
||||||
|
);
|
||||||
|
}
|
||||||
if let Some(sp) = sp {
|
if let Some(sp) = sp {
|
||||||
err.span(sp);
|
err.span(sp);
|
||||||
}
|
}
|
||||||
|
@ -84,6 +100,49 @@ pub fn new_parser_from_file<'a>(
|
||||||
new_parser_from_source_file(psess, source_file)
|
new_parser_from_source_file(psess, source_file)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn utf8_error<E: EmissionGuarantee>(
|
||||||
|
sm: &SourceMap,
|
||||||
|
path: &str,
|
||||||
|
sp: Option<Span>,
|
||||||
|
err: &mut Diag<'_, E>,
|
||||||
|
utf8err: Utf8Error,
|
||||||
|
contents: &[u8],
|
||||||
|
) {
|
||||||
|
// The file exists, but it wasn't valid UTF-8.
|
||||||
|
let start = utf8err.valid_up_to();
|
||||||
|
let note = format!("invalid utf-8 at byte `{start}`");
|
||||||
|
let msg = if let Some(len) = utf8err.error_len() {
|
||||||
|
format!(
|
||||||
|
"byte{s} `{bytes}` {are} not valid utf-8",
|
||||||
|
bytes = if len == 1 {
|
||||||
|
format!("{:?}", contents[start])
|
||||||
|
} else {
|
||||||
|
format!("{:?}", &contents[start..start + len])
|
||||||
|
},
|
||||||
|
s = pluralize!(len),
|
||||||
|
are = if len == 1 { "is" } else { "are" },
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
note.clone()
|
||||||
|
};
|
||||||
|
let contents = String::from_utf8_lossy(contents).to_string();
|
||||||
|
let source = sm.new_source_file(PathBuf::from(path).into(), contents);
|
||||||
|
let span = Span::with_root_ctxt(
|
||||||
|
source.normalized_byte_pos(start as u32),
|
||||||
|
source.normalized_byte_pos(start as u32),
|
||||||
|
);
|
||||||
|
if span.is_dummy() {
|
||||||
|
err.note(note);
|
||||||
|
} else {
|
||||||
|
if sp.is_some() {
|
||||||
|
err.span_note(span, msg);
|
||||||
|
} else {
|
||||||
|
err.span(span);
|
||||||
|
err.span_label(span, msg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Given a session and a `source_file`, return a parser. Returns any buffered errors from lexing
|
/// Given a session and a `source_file`, return a parser. Returns any buffered errors from lexing
|
||||||
/// the initial token stream.
|
/// the initial token stream.
|
||||||
fn new_parser_from_source_file(
|
fn new_parser_from_source_file(
|
||||||
|
|
|
@ -101,6 +101,8 @@ pub fn load_errors(testfile: &Path, revision: Option<&str>) -> Vec<Error> {
|
||||||
|
|
||||||
rdr.lines()
|
rdr.lines()
|
||||||
.enumerate()
|
.enumerate()
|
||||||
|
// We want to ignore utf-8 failures in tests during collection of annotations.
|
||||||
|
.filter(|(_, line)| line.is_ok())
|
||||||
.filter_map(|(line_num, line)| {
|
.filter_map(|(line_num, line)| {
|
||||||
parse_expected(last_nonfollow_error, line_num + 1, &line.unwrap(), revision).map(
|
parse_expected(last_nonfollow_error, line_num + 1, &line.unwrap(), revision).map(
|
||||||
|(which, error)| {
|
|(which, error)| {
|
||||||
|
|
|
@ -58,7 +58,7 @@ pub fn check(tests_path: impl AsRef<Path>, bad: &mut bool) {
|
||||||
|
|
||||||
let mut expected_revisions = BTreeSet::new();
|
let mut expected_revisions = BTreeSet::new();
|
||||||
|
|
||||||
let contents = std::fs::read_to_string(test).unwrap();
|
let Ok(contents) = std::fs::read_to_string(test) else { continue };
|
||||||
|
|
||||||
// Collect directives.
|
// Collect directives.
|
||||||
iter_header(&contents, &mut |HeaderLine { revision, directive, .. }| {
|
iter_header(&contents, &mut |HeaderLine { revision, directive, .. }| {
|
||||||
|
|
|
@ -3,5 +3,5 @@
|
||||||
//@ reference: input.encoding.invalid
|
//@ reference: input.encoding.invalid
|
||||||
|
|
||||||
fn foo() {
|
fn foo() {
|
||||||
include!("not-utf8.bin")
|
include!("not-utf8.bin");
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,14 @@
|
||||||
error: couldn't read $DIR/not-utf8.bin: stream did not contain valid UTF-8
|
error: couldn't read `$DIR/not-utf8.bin`: stream did not contain valid UTF-8
|
||||||
--> $DIR/not-utf8.rs:6:5
|
--> $DIR/not-utf8.rs:6:5
|
||||||
|
|
|
|
||||||
LL | include!("not-utf8.bin")
|
LL | include!("not-utf8.bin");
|
||||||
| ^^^^^^^^^^^^^^^^^^^^^^^^
|
| ^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
|
|
||||||
|
note: byte `193` is not valid utf-8
|
||||||
|
--> $DIR/not-utf8.bin:1:1
|
||||||
|
|
|
||||||
|
LL | <20>|<7C>␂!5<>cc␕␂<E29095>Ӻi<D3BA><69>WWj<57>ȥ<EFBFBD>'<27>}<7D>␒<EFBFBD>J<EFBFBD>ȉ<EFBFBD><C889>W<EFBFBD>␞O<E2909E>@<40><><EFBFBD><EFBFBD>␜w<E2909C>V<EFBFBD><56><EFBFBD>LO<4C><4F><EFBFBD><EFBFBD>␔[ ␃_<E29083>'<27><><EFBFBD>SQ<53>~ذ<><D8B0>ų&<26><>- <20><>lN~<7E><>!@␌ _#<23><><EFBFBD>kQ<6B><51>h<68>␝<EF8F81>:<3A>...
|
||||||
|
| ^
|
||||||
= note: this error originates in the macro `include` (in Nightly builds, run with -Z macro-backtrace for more info)
|
= note: this error originates in the macro `include` (in Nightly builds, run with -Z macro-backtrace for more info)
|
||||||
|
|
||||||
error: aborting due to 1 previous error
|
error: aborting due to 1 previous error
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
//@ normalize-stderr: "\.:.*\(" -> ".: $$ACCESS_DENIED_MSG ("
|
//@ normalize-stderr: "\.`:.*\(" -> ".`: $$ACCESS_DENIED_MSG ("
|
||||||
//@ normalize-stderr: "os error \d+" -> "os error $$ACCESS_DENIED_CODE"
|
//@ normalize-stderr: "os error \d+" -> "os error $$ACCESS_DENIED_CODE"
|
||||||
|
|
||||||
#[path = "."]
|
#[path = "."]
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
error: couldn't read $DIR/.: $ACCESS_DENIED_MSG (os error $ACCESS_DENIED_CODE)
|
error: couldn't read `$DIR/.`: $ACCESS_DENIED_MSG (os error $ACCESS_DENIED_CODE)
|
||||||
--> $DIR/path-no-file-name.rs:5:1
|
--> $DIR/path-no-file-name.rs:5:1
|
||||||
|
|
|
|
||||||
LL | mod m;
|
LL | mod m;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
//@ normalize-stderr: "parser:.*\(" -> "parser: $$ACCESS_DENIED_MSG ("
|
//@ normalize-stderr: "parser`:.*\(" -> "parser`: $$ACCESS_DENIED_MSG ("
|
||||||
//@ normalize-stderr: "os error \d+" -> "os error $$ACCESS_DENIED_CODE"
|
//@ normalize-stderr: "os error \d+" -> "os error $$ACCESS_DENIED_CODE"
|
||||||
|
|
||||||
#[path = "../parser"]
|
#[path = "../parser"]
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
error: couldn't read $DIR/../parser: $ACCESS_DENIED_MSG (os error $ACCESS_DENIED_CODE)
|
error: couldn't read `$DIR/../parser`: $ACCESS_DENIED_MSG (os error $ACCESS_DENIED_CODE)
|
||||||
--> $DIR/issue-5806.rs:5:1
|
--> $DIR/issue-5806.rs:5:1
|
||||||
|
|
|
|
||||||
LL | mod foo;
|
LL | mod foo;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
//@ normalize-stderr: "not_a_real_file.rs:.*\(" -> "not_a_real_file.rs: $$FILE_NOT_FOUND_MSG ("
|
//@ normalize-stderr: "not_a_real_file.rs`:.*\(" -> "not_a_real_file.rs`: $$FILE_NOT_FOUND_MSG ("
|
||||||
|
|
||||||
#[path = "not_a_real_file.rs"]
|
#[path = "not_a_real_file.rs"]
|
||||||
mod m; //~ ERROR not_a_real_file.rs
|
mod m; //~ ERROR not_a_real_file.rs
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
error: couldn't read $DIR/not_a_real_file.rs: $FILE_NOT_FOUND_MSG (os error 2)
|
error: couldn't read `$DIR/not_a_real_file.rs`: $FILE_NOT_FOUND_MSG (os error 2)
|
||||||
--> $DIR/mod_file_with_path_attr.rs:4:1
|
--> $DIR/mod_file_with_path_attr.rs:4:1
|
||||||
|
|
|
|
||||||
LL | mod m;
|
LL | mod m;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
error: couldn't read $DIR/lol: No such file or directory (os error 2)
|
error: couldn't read `$DIR/lol`: No such file or directory (os error 2)
|
||||||
--> $DIR/staged-api-invalid-path-108697.rs:8:1
|
--> $DIR/staged-api-invalid-path-108697.rs:8:1
|
||||||
|
|
|
|
||||||
LL | mod foo;
|
LL | mod foo;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue