1
Fork 0

Auto merge of #71487 - rcoh:71471-shebang, r=petrochenkov

Fix bug in shebang handling

Shebang handling was too agressive in stripping out the first line in cases where it is actually _not_ a shebang, but instead, valid rust (#70528). This is a second attempt at resolving this issue (the first attempt was reverted, for, among other reasons, causing an ICE in certain cases (#71372, #71471).

The behavior is now codified by a number of UI tests, but simply:
For the first line to be a shebang, the following must all be true:
1. The line must start with `#!`
2. The line must contain a non-whitespace character after `#!`
3. The next character in the file, ignoring comments & whitespace must not be `[`

I believe this is a strict superset of what we used to allow, so perhaps a crater run is unnecessary, but probably not a terrible idea.

Fixes #70528
This commit is contained in:
bors 2020-05-26 01:43:40 +00:00
commit 9eedd138ee
13 changed files with 154 additions and 7 deletions

View file

@ -236,16 +236,28 @@ pub enum Base {
} }
/// `rustc` allows files to have a shebang, e.g. "#!/usr/bin/rustrun", /// `rustc` allows files to have a shebang, e.g. "#!/usr/bin/rustrun",
/// but shebang isn't a part of rust syntax, so this function /// but shebang isn't a part of rust syntax.
/// skips the line if it starts with a shebang ("#!").
/// Line won't be skipped if it represents a valid Rust syntax
/// (e.g. "#![deny(missing_docs)]").
pub fn strip_shebang(input: &str) -> Option<usize> { pub fn strip_shebang(input: &str) -> Option<usize> {
debug_assert!(!input.is_empty()); let first_line = input.lines().next()?;
if !input.starts_with("#!") || input.starts_with("#![") { // A shebang is intentionally loosely defined as `#! [non whitespace]` on the first line.
let could_be_shebang =
first_line.starts_with("#!") && first_line[2..].contains(|c| !is_whitespace(c));
if !could_be_shebang {
return None; return None;
} }
Some(input.find('\n').unwrap_or(input.len())) let non_whitespace_tokens = tokenize(input).map(|tok| tok.kind).filter(|tok|
!matches!(tok, TokenKind::LineComment | TokenKind::BlockComment { .. } | TokenKind::Whitespace)
);
let prefix = [TokenKind::Pound, TokenKind::Not, TokenKind::OpenBracket];
let starts_with_attribute = non_whitespace_tokens.take(3).eq(prefix.iter().copied());
if starts_with_attribute {
// If the file starts with #![ then it's definitely not a shebang -- it couldn't be
// a rust program since a Rust program can't start with `[`
None
} else {
// It's a #!... and there isn't a `[` in sight, must be a shebang
Some(first_line.len())
}
} }
/// Parses the first token from the provided input string. /// Parses the first token from the provided input string.

View file

@ -145,4 +145,61 @@ mod tests {
}), }),
); );
} }
#[test]
fn test_valid_shebang() {
// https://github.com/rust-lang/rust/issues/70528
let input = "#!/usr/bin/rustrun\nlet x = 5;";
assert_eq!(strip_shebang(input), Some(18));
}
#[test]
fn test_invalid_shebang_valid_rust_syntax() {
// https://github.com/rust-lang/rust/issues/70528
let input = "#! [bad_attribute]";
assert_eq!(strip_shebang(input), None);
}
#[test]
fn test_shebang_second_line() {
// Because shebangs are interpreted by the kernel, they must be on the first line
let input = "\n#!/bin/bash";
assert_eq!(strip_shebang(input), None);
}
#[test]
fn test_shebang_space() {
let input = "#! /bin/bash";
assert_eq!(strip_shebang(input), Some(input.len()));
}
#[test]
fn test_shebang_empty_shebang() {
let input = "#! \n[attribute(foo)]";
assert_eq!(strip_shebang(input), None);
}
#[test]
fn test_invalid_shebang_comment() {
let input = "#!//bin/ami/a/comment\n[";
assert_eq!(strip_shebang(input), None)
}
#[test]
fn test_invalid_shebang_another_comment() {
let input = "#!/*bin/ami/a/comment*/\n[attribute";
assert_eq!(strip_shebang(input), None)
}
#[test]
fn test_shebang_valid_rust_after() {
let input = "#!/*bin/ami/a/comment*/\npub fn main() {}";
assert_eq!(strip_shebang(input), Some(23))
}
#[test]
fn test_shebang_followed_by_attrib() {
let input = "#!/bin/rust-scripts\n#![allow_unused(true)]";
assert_eq!(strip_shebang(input), Some(19));
}
} }

View file

@ -0,0 +1,2 @@
#!B //~ expected `[`, found `B`

View file

@ -0,0 +1,8 @@
error: expected `[`, found `B`
--> $DIR/issue-71471-ignore-tidy.rs:2:3
|
LL | #!B
| ^ expected `[`
error: aborting due to previous error

View file

@ -0,0 +1,7 @@
#!
[allow(unused_variables)]
// check-pass
fn main() {
let x = 5;
}

View file

@ -0,0 +1,5 @@
#![allow(unused_variables)]
// check-pass
fn main() {
let x = 5;
}

View file

@ -0,0 +1,9 @@
#!/usr/bin/env run-cargo-script
// check-pass
#![allow(unused_variables)]
fn main() {
let x = 5;
}

View file

@ -0,0 +1,6 @@
#!//bin/bash
// check-pass
fn main() {
println!("a valid shebang (that is also a rust comment)")
}

View file

@ -0,0 +1,6 @@
// something on the first line for tidy
#!/bin/bash //~ expected `[`, found `/`
fn main() {
println!("ok!");
}

View file

@ -0,0 +1,8 @@
error: expected `[`, found `/`
--> $DIR/shebang-must-start-file.rs:2:3
|
LL | #!/bin/bash
| ^ expected `[`
error: aborting due to previous error

View file

@ -0,0 +1,16 @@
#!//bin/bash
// This could not possibly be a shebang & also a valid rust file, since a Rust file
// can't start with `[`
/*
[ (mixing comments to also test that we ignore both types of comments)
*/
[allow(unused_variables)]
// check-pass
fn main() {
let x = 5;
}

View file

@ -0,0 +1,6 @@
#!/usr/bin/env run-cargo-script
// check-pass
fn main() {
println!("Hello World!");
}

View file

@ -174,6 +174,11 @@ pub fn check(path: &Path, bad: &mut bool) {
let can_contain = let can_contain =
contents.contains("// ignore-tidy-") || contents.contains("# ignore-tidy-"); contents.contains("// ignore-tidy-") || contents.contains("# ignore-tidy-");
// Enable testing ICE's that require specific (untidy)
// file formats easily eg. `issue-1234-ignore-tidy.rs`
if filename.contains("ignore-tidy") {
return;
}
let mut skip_cr = contains_ignore_directive(can_contain, &contents, "cr"); let mut skip_cr = contains_ignore_directive(can_contain, &contents, "cr");
let mut skip_undocumented_unsafe = let mut skip_undocumented_unsafe =
contains_ignore_directive(can_contain, &contents, "undocumented-unsafe"); contains_ignore_directive(can_contain, &contents, "undocumented-unsafe");