Auto merge of #73596 - petrochenkov:shebang2, r=Mark-Simulacrum
rustc_lexer: Simplify shebang parsing once more Fixes https://github.com/rust-lang/rust/issues/73250 (beta regression) Treat any line starting with `!#` as a shebang candidate, not only lines with something non-whitespace. This way we no longer need to define what `is_whitespace` means ([Linux shebang whitespace](https://github.com/torvalds/linux/blob/master/fs/binfmt_script.c), ASCII whitespace, Rust lexer whitespace, etc), which is nice. This change makes some invalid Rust code valid (see the regression above), but still never interprets a fragment of valid Rust code as a shebang. (This PR also removes one duplicate test.)
This commit is contained in:
commit
dda8a7fde9
4 changed files with 21 additions and 20 deletions
|
@ -179,21 +179,18 @@ pub enum Base {
|
|||
/// but shebang isn't a part of rust syntax.
|
||||
pub fn strip_shebang(input: &str) -> Option<usize> {
|
||||
// Shebang must start with `#!` literally, without any preceding whitespace.
|
||||
if input.starts_with("#!") {
|
||||
let input_tail = &input[2..];
|
||||
// Shebang must have something non-whitespace after `#!` on the first line.
|
||||
let first_line_tail = input_tail.lines().next()?;
|
||||
if first_line_tail.contains(|c| !is_whitespace(c)) {
|
||||
// Ok, this is a shebang but if the next non-whitespace token is `[` or maybe
|
||||
// a doc comment (due to `TokenKind::(Line,Block)Comment` ambiguity at lexer level),
|
||||
// then it may be valid Rust code, so consider it Rust code.
|
||||
let next_non_whitespace_token = tokenize(input_tail).map(|tok| tok.kind).find(|tok|
|
||||
!matches!(tok, TokenKind::Whitespace | TokenKind::LineComment | TokenKind::BlockComment { .. })
|
||||
);
|
||||
if next_non_whitespace_token != Some(TokenKind::OpenBracket) {
|
||||
// No other choice than to consider this a shebang.
|
||||
return Some(2 + first_line_tail.len());
|
||||
}
|
||||
// For simplicity we consider any line starting with `#!` a shebang,
|
||||
// regardless of restrictions put on shebangs by specific platforms.
|
||||
if let Some(input_tail) = input.strip_prefix("#!") {
|
||||
// Ok, this is a shebang but if the next non-whitespace token is `[` or maybe
|
||||
// a doc comment (due to `TokenKind::(Line,Block)Comment` ambiguity at lexer level),
|
||||
// then it may be valid Rust code, so consider it Rust code.
|
||||
let next_non_whitespace_token = tokenize(input_tail).map(|tok| tok.kind).find(|tok|
|
||||
!matches!(tok, TokenKind::Whitespace | TokenKind::LineComment | TokenKind::BlockComment { .. })
|
||||
);
|
||||
if next_non_whitespace_token != Some(TokenKind::OpenBracket) {
|
||||
// No other choice than to consider this a shebang.
|
||||
return Some(2 + input_tail.lines().next().unwrap_or_default().len());
|
||||
}
|
||||
}
|
||||
None
|
||||
|
|
4
src/test/ui/parser/shebang/shebang-empty.rs
Normal file
4
src/test/ui/parser/shebang/shebang-empty.rs
Normal file
|
@ -0,0 +1,4 @@
|
|||
#!
|
||||
|
||||
// check-pass
|
||||
fn main() {}
|
5
src/test/ui/parser/shebang/shebang-space.rs
Normal file
5
src/test/ui/parser/shebang/shebang-space.rs
Normal file
|
@ -0,0 +1,5 @@
|
|||
#!
|
||||
|
||||
// check-pass
|
||||
// ignore-tidy-end-whitespace
|
||||
fn main() {}
|
|
@ -1,5 +0,0 @@
|
|||
#!/usr/bin/env rustx
|
||||
|
||||
// run-pass
|
||||
|
||||
pub fn main() { println!("Hello World"); }
|
Loading…
Add table
Add a link
Reference in a new issue