1
Fork 0

Reserve prefixed identifiers and string literals (RFC 3101)

This commit denies any identifiers immediately followed by
one of three tokens `"`, `'` or `#`, which is stricter than
the requirements of RFC 3101 but may be necessary according
to the discussion at [Zulip].

[Zulip]: 238470099
This commit is contained in:
lrh2000 2021-05-16 11:10:05 +08:00
parent 831ae3c136
commit 8dee9bc8fc
5 changed files with 172 additions and 7 deletions

View file

@ -66,6 +66,8 @@ pub enum TokenKind {
Ident,
/// "r#ident"
RawIdent,
/// `foo#`, `foo'`, `foo"`. Note the tailer is not included.
BadPrefix,
/// "12_u8", "1.0e-40", "b"123"". See `LiteralKind` for more details.
Literal { kind: LiteralKind, suffix_start: usize },
/// "'a"
@ -323,7 +325,7 @@ impl Cursor<'_> {
let kind = RawStr { n_hashes, err };
Literal { kind, suffix_start }
}
_ => self.ident(),
_ => self.ident_or_bad_prefix(),
},
// Byte literal, byte string literal, raw byte string literal or identifier.
@ -358,12 +360,12 @@ impl Cursor<'_> {
let kind = RawByteStr { n_hashes, err };
Literal { kind, suffix_start }
}
_ => self.ident(),
_ => self.ident_or_bad_prefix(),
},
// Identifier (this should be checked after other variant that can
// start as identifier).
c if is_id_start(c) => self.ident(),
c if is_id_start(c) => self.ident_or_bad_prefix(),
// Numeric literal.
c @ '0'..='9' => {
@ -487,11 +489,16 @@ impl Cursor<'_> {
RawIdent
}
fn ident(&mut self) -> TokenKind {
fn ident_or_bad_prefix(&mut self) -> TokenKind {
debug_assert!(is_id_start(self.prev()));
// Start is already eaten, eat the rest of identifier.
self.eat_while(is_id_continue);
Ident
// Good prefixes must have been handled eariler. So if
// we see a prefix here, it is definitely a bad prefix.
match self.first() {
'#' | '"' | '\'' => BadPrefix,
_ => Ident,
}
}
fn number(&mut self, first_digit: char) -> LiteralKind {