Auto merge of #71487 - rcoh:71471-shebang, r=petrochenkov

Fix bug in shebang handling Shebang handling was too agressive in stripping out the first line in cases where it is actually _not_ a shebang, but instead, valid rust (#70528). This is a second attempt at resolving this issue (the first attempt was reverted, for, among other reasons, causing an ICE in certain cases (#71372, #71471). The behavior is now codified by a number of UI tests, but simply: For the first line to be a shebang, the following must all be true: 1. The line must start with `#!` 2. The line must contain a non-whitespace character after `#!` 3. The next character in the file, ignoring comments & whitespace must not be `[` I believe this is a strict superset of what we used to allow, so perhaps a crater run is unnecessary, but probably not a terrible idea. Fixes #70528
2020-05-26 01:43:40 +00:00 · 2020-05-26 01:43:40 +00:00 · 9eedd138ee
commit 9eedd138ee
parent 698c5c6d95 a93d31603f
13 changed files with 154 additions and 7 deletions
--- a/src/librustc_lexer/src/lib.rs
+++ b/src/librustc_lexer/src/lib.rs
@ -236,16 +236,28 @@ pub enum Base {
 }
 /// `rustc` allows files to have a shebang, e.g. "#!/usr/bin/rustrun",
-/// but shebang isn't a part of rust syntax, so this function
+/// but shebang isn't a part of rust syntax.
 /// skips the line if it starts with a shebang ("#!").
 /// Line won't be skipped if it represents a valid Rust syntax
 /// (e.g. "#![deny(missing_docs)]").
 pub fn strip_shebang(input: &str) -> Option<usize> {
-    debug_assert!(!input.is_empty());
+    let first_line = input.lines().next()?;
-    if !input.starts_with("#!") || input.starts_with("#![") {
+    // A shebang is intentionally loosely defined as `#! [non whitespace]` on the first line.
    let could_be_shebang =
        first_line.starts_with("#!") && first_line[2..].contains(|c| !is_whitespace(c));
    if !could_be_shebang {
        return None;
    }
-    Some(input.find('\n').unwrap_or(input.len()))
+    let non_whitespace_tokens = tokenize(input).map(|tok| tok.kind).filter(|tok|
        !matches!(tok, TokenKind::LineComment | TokenKind::BlockComment { .. } | TokenKind::Whitespace)
    );
    let prefix = [TokenKind::Pound, TokenKind::Not, TokenKind::OpenBracket];
    let starts_with_attribute = non_whitespace_tokens.take(3).eq(prefix.iter().copied());
    if starts_with_attribute {
        // If the file starts with #![ then it's definitely not a shebang -- it couldn't be
        // a rust program since a Rust program can't start with `[`
        None
    } else {
        // It's a #!... and there isn't a `[` in sight, must be a shebang
        Some(first_line.len())
    }
 }
 /// Parses the first token from the provided input string.
--- a/src/librustc_lexer/src/tests.rs
+++ b/src/librustc_lexer/src/tests.rs
@ -145,4 +145,61 @@ mod tests {
            }),
        );
    }
    #[test]
    fn test_valid_shebang() {
        // https://github.com/rust-lang/rust/issues/70528
        let input = "#!/usr/bin/rustrun\nlet x = 5;";
        assert_eq!(strip_shebang(input), Some(18));
    }
    #[test]
    fn test_invalid_shebang_valid_rust_syntax() {
        // https://github.com/rust-lang/rust/issues/70528
        let input = "#!    [bad_attribute]";
        assert_eq!(strip_shebang(input), None);
    }
    #[test]
    fn test_shebang_second_line() {
        // Because shebangs are interpreted by the kernel, they must be on the first line
        let input = "\n#!/bin/bash";
        assert_eq!(strip_shebang(input), None);
    }
    #[test]
    fn test_shebang_space() {
        let input = "#!    /bin/bash";
        assert_eq!(strip_shebang(input), Some(input.len()));
    }
    #[test]
    fn test_shebang_empty_shebang() {
        let input = "#!    \n[attribute(foo)]";
        assert_eq!(strip_shebang(input), None);
    }
    #[test]
    fn test_invalid_shebang_comment() {
        let input = "#!//bin/ami/a/comment\n[";
        assert_eq!(strip_shebang(input), None)
    }
    #[test]
    fn test_invalid_shebang_another_comment() {
        let input = "#!/*bin/ami/a/comment*/\n[attribute";
        assert_eq!(strip_shebang(input), None)
    }
    #[test]
    fn test_shebang_valid_rust_after() {
        let input = "#!/*bin/ami/a/comment*/\npub fn main() {}";
        assert_eq!(strip_shebang(input), Some(23))
    }
    #[test]
    fn test_shebang_followed_by_attrib() {
        let input = "#!/bin/rust-scripts\n#![allow_unused(true)]";
        assert_eq!(strip_shebang(input), Some(19));
    }
 }
--- a/src/test/ui/parser/shebang/issue-71471-ignore-tidy.rs
+++ b/src/test/ui/parser/shebang/issue-71471-ignore-tidy.rs
@ -0,0 +1,2 @@
 #!B //~ expected `[`, found `B`
--- a/src/test/ui/parser/shebang/issue-71471-ignore-tidy.stderr
+++ b/src/test/ui/parser/shebang/issue-71471-ignore-tidy.stderr
@ -0,0 +1,8 @@
 error: expected `[`, found `B`
  --> $DIR/issue-71471-ignore-tidy.rs:2:3
   |
 LL | #!B
   |   ^ expected `[`
 error: aborting due to previous error
--- a/src/test/ui/parser/shebang/multiline-attrib.rs
+++ b/src/test/ui/parser/shebang/multiline-attrib.rs
@ -0,0 +1,7 @@
 #!
 [allow(unused_variables)]
 // check-pass
 fn main() {
    let x = 5;
 }
--- a/src/test/ui/parser/shebang/regular-attrib.rs
+++ b/src/test/ui/parser/shebang/regular-attrib.rs
@ -0,0 +1,5 @@
 #![allow(unused_variables)]
 // check-pass
 fn main() {
    let x = 5;
 }
--- a/src/test/ui/parser/shebang/shebang-and-attrib.rs
+++ b/src/test/ui/parser/shebang/shebang-and-attrib.rs
@ -0,0 +1,9 @@
 #!/usr/bin/env run-cargo-script
 // check-pass
 #![allow(unused_variables)]
 fn main() {
    let x = 5;
 }
--- a/src/test/ui/parser/shebang/shebang-comment.rs
+++ b/src/test/ui/parser/shebang/shebang-comment.rs
@ -0,0 +1,6 @@
 #!//bin/bash
 // check-pass
 fn main() {
    println!("a valid shebang (that is also a rust comment)")
 }
--- a/src/test/ui/parser/shebang/shebang-must-start-file.rs
+++ b/src/test/ui/parser/shebang/shebang-must-start-file.rs
@ -0,0 +1,6 @@
 // something on the first line for tidy
 #!/bin/bash  //~ expected `[`, found `/`
 fn main() {
    println!("ok!");
 }
--- a/src/test/ui/parser/shebang/shebang-must-start-file.stderr
+++ b/src/test/ui/parser/shebang/shebang-must-start-file.stderr
@ -0,0 +1,8 @@
 error: expected `[`, found `/`
  --> $DIR/shebang-must-start-file.rs:2:3
   |
 LL | #!/bin/bash
   |   ^ expected `[`
 error: aborting due to previous error
--- a/src/test/ui/parser/shebang/sneaky-attrib.rs
+++ b/src/test/ui/parser/shebang/sneaky-attrib.rs
@ -0,0 +1,16 @@
 #!//bin/bash
 // This could not possibly be a shebang & also a valid rust file, since a Rust file
 // can't start with `[`
 /*
    [ (mixing comments to also test that we ignore both types of comments)
 */
 [allow(unused_variables)]
 // check-pass
 fn main() {
    let x = 5;
 }
--- a/src/test/ui/parser/shebang/valid-shebang.rs
+++ b/src/test/ui/parser/shebang/valid-shebang.rs
@ -0,0 +1,6 @@
 #!/usr/bin/env run-cargo-script
 // check-pass
 fn main() {
    println!("Hello World!");
 }
--- a/src/tools/tidy/src/style.rs
+++ b/src/tools/tidy/src/style.rs
@ -174,6 +174,11 @@ pub fn check(path: &Path, bad: &mut bool) {
        let can_contain =
            contents.contains("// ignore-tidy-") || contents.contains("# ignore-tidy-");
        // Enable testing ICE's that require specific (untidy)
        // file formats easily eg. `issue-1234-ignore-tidy.rs`
        if filename.contains("ignore-tidy") {
            return;
        }
        let mut skip_cr = contains_ignore_directive(can_contain, &contents, "cr");
        let mut skip_undocumented_unsafe =
            contains_ignore_directive(can_contain, &contents, "undocumented-unsafe");