From df9bd80d74b72aacf336d4f1a4e44ddaff2757ba Mon Sep 17 00:00:00 2001 From: Deadbeef Date: Sun, 16 Jul 2023 18:59:05 +0000 Subject: [PATCH] reimplement C string literals --- compiler/rustc_lexer/src/cursor.rs | 4 ++ compiler/rustc_lexer/src/lib.rs | 7 ++++ compiler/rustc_parse/src/lexer/mod.rs | 26 +++++++++++- .../rfcs/rfc-3348-c-string-literals/basic.rs | 3 +- .../rfc-3348-c-string-literals/basic.stderr | 25 ------------ .../rfc-3348-c-string-literals/gate.stderr | 31 +++++--------- .../rfc-3348-c-string-literals/no-nuls.rs | Bin 760 -> 623 bytes .../rfc-3348-c-string-literals/no-nuls.stderr | Bin 4477 -> 674 bytes .../rfc-3348-c-string-literals/non-ascii.rs | 3 +- .../non-ascii.stderr | 38 ------------------ 10 files changed, 48 insertions(+), 89 deletions(-) delete mode 100644 tests/ui/rfcs/rfc-3348-c-string-literals/basic.stderr delete mode 100644 tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.stderr diff --git a/compiler/rustc_lexer/src/cursor.rs b/compiler/rustc_lexer/src/cursor.rs index eceef59802e..aba7f95487e 100644 --- a/compiler/rustc_lexer/src/cursor.rs +++ b/compiler/rustc_lexer/src/cursor.rs @@ -24,6 +24,10 @@ impl<'a> Cursor<'a> { } } + pub fn as_str(&self) -> &'a str { + self.chars.as_str() + } + /// Returns the last eaten symbol (or `'\0'` in release builds). /// (For debug assertions only.) pub(crate) fn prev(&self) -> char { diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 29335a8c0f4..d511d2b1280 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -367,6 +367,13 @@ impl Cursor<'_> { Some(|terminated| Byte { terminated }), ), + // c-string literal, raw c-string literal or identifier. + 'c' => self.c_or_byte_string( + |terminated| CStr { terminated }, + |n_hashes| RawCStr { n_hashes }, + None, + ), + // Identifier (this should be checked after other variant that can // start as identifier). c if is_id_start(c) => self.ident_or_unknown_prefix(), diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index c6e6b46e455..cfcc2ec42fa 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -9,8 +9,8 @@ use rustc_ast::tokenstream::TokenStream; use rustc_ast::util::unicode::contains_text_flow_control_chars; use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey}; use rustc_lexer::unescape::{self, EscapeError, Mode}; -use rustc_lexer::Cursor; use rustc_lexer::{Base, DocStyle, RawStrError}; +use rustc_lexer::{Cursor, LiteralKind}; use rustc_session::lint::builtin::{ RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, TEXT_DIRECTION_CODEPOINT_IN_COMMENT, }; @@ -118,6 +118,7 @@ impl<'a> StringReader<'a> { let mut swallow_next_invalid = 0; // Skip trivial (whitespace & comments) tokens loop { + let str_before = self.cursor.as_str(); let token = self.cursor.advance_token(); let start = self.pos; self.pos = self.pos + BytePos(token.len); @@ -203,6 +204,29 @@ impl<'a> StringReader<'a> { .push(span); token::Ident(sym, false) } + // split up (raw) c string literals to an ident and a string literal when edition < 2021. + rustc_lexer::TokenKind::Literal { + kind: kind @ (LiteralKind::CStr { .. } | LiteralKind::RawCStr { .. }), + suffix_start: _, + } if !self.mk_sp(start, self.pos).edition().at_least_rust_2021() => { + let prefix_len = match kind { + LiteralKind::CStr { .. } => 1, + LiteralKind::RawCStr { .. } => 2, + _ => unreachable!(), + }; + + // reset the state so that only the prefix ("c" or "cr") + // was consumed. + let lit_start = start + BytePos(prefix_len); + self.pos = lit_start; + self.cursor = Cursor::new(&str_before[prefix_len as usize..]); + + self.report_unknown_prefix(start); + let sym = nfc_normalize(self.str_from(start)); + let prefix_span = self.mk_sp(start, lit_start); + self.sess.symbol_gallery.insert(sym, prefix_span); + return (Token::new(token::Ident(sym, false), prefix_span), preceded_by_whitespace); + } rustc_lexer::TokenKind::Literal { kind, suffix_start } => { let suffix_start = start + BytePos(suffix_start); let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind); diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs index 3fc5fd481ea..5037396000b 100644 --- a/tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs +++ b/tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs @@ -1,5 +1,4 @@ -// FIXME(c_str_literals): This should be `run-pass` -// known-bug: #113333 +// run-pass // edition: 2021 #![feature(c_str_literals)] diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/basic.stderr b/tests/ui/rfcs/rfc-3348-c-string-literals/basic.stderr deleted file mode 100644 index 571c319d8c5..00000000000 --- a/tests/ui/rfcs/rfc-3348-c-string-literals/basic.stderr +++ /dev/null @@ -1,25 +0,0 @@ -error: prefix `c` is unknown - --> $DIR/basic.rs:8:27 - | -LL | assert_eq!(b"test\0", c"test".to_bytes_with_nul()); - | ^ unknown prefix - | - = note: prefixed identifiers and literals are reserved since Rust 2021 -help: consider inserting whitespace here - | -LL | assert_eq!(b"test\0", c "test".to_bytes_with_nul()); - | + - -error: no rules expected the token `"test"` - --> $DIR/basic.rs:8:28 - | -LL | assert_eq!(b"test\0", c"test".to_bytes_with_nul()); - | -^^^^^ - | | - | no rules expected this token in macro call - | help: missing comma here - | - = note: while trying to match sequence start - -error: aborting due to 2 previous errors - diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr b/tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr index 8de36ca4a6e..ea666e43308 100644 --- a/tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr +++ b/tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr @@ -1,32 +1,21 @@ -error: prefix `c` is unknown +error[E0658]: `c".."` literals are experimental --> $DIR/gate.rs:10:5 | LL | c"foo"; - | ^ unknown prefix + | ^^^^^^ | - = note: prefixed identifiers and literals are reserved since Rust 2021 -help: consider inserting whitespace here - | -LL | c "foo"; - | + + = note: see issue #105723 for more information + = help: add `#![feature(c_str_literals)]` to the crate attributes to enable -error: prefix `c` is unknown +error[E0658]: `c".."` literals are experimental --> $DIR/gate.rs:13:8 | LL | m!(c"test"); - | ^ unknown prefix + | ^^^^^^^ | - = note: prefixed identifiers and literals are reserved since Rust 2021 -help: consider inserting whitespace here - | -LL | m!(c "test"); - | + + = note: see issue #105723 for more information + = help: add `#![feature(c_str_literals)]` to the crate attributes to enable -error: expected one of `!`, `.`, `::`, `;`, `?`, `{`, `}`, or an operator, found `"foo"` - --> $DIR/gate.rs:10:6 - | -LL | c"foo"; - | ^^^^^ expected one of 8 possible tokens - -error: aborting due to 3 previous errors +error: aborting due to 2 previous errors +For more information about this error, try `rustc --explain E0658`. diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs index 96945f125da71a9097f6f26c75a328f097448568..369173e23184e4537fc5fb34dbe716a984667c07 100644 GIT binary patch delta 208 zcmeyt`krN?m=9-NoPujmkbjWEWNSugO@*AylGLKaoMJ961qB6#WTlwWY6F8>C2KHO nUmr!a3WjP1>G<`$GxLZeedMMI^e z#Hyr3Q%h69)=r_i)|#u9i%U5=IW1ko&Cw^=RWlZp21-(kOR%K@w0KK~yFFP6DSVP4 megeld*mgw?a5~b2BqW3oBn_YoGbo{pfXzk|1lgepay;g#a8@WTDay=CSIEgMNi9muDON}#|q zFHtBiEhxw@DoIV@Qc%#%h$~-%_-I^Dz-ASGUrlIP^jVZ@lmKz0D)ws z7y~71upo%5pr8;J$HhepyDhQVU0Q8mP>XJVTpX?3U}%KJ5e#Vd)5iOTrdX_wsW89@ veVX~*0@DFSN;o~An3P{s0!m~lrKt)f`3j~A1x2Z4nfax~3gFaH%*6!&$AQVP literal 4477 zcmYc-D#|aiQYa`&P0OrMNJvgl$ShVU&CAZqFVEvrP|(%2Q&4g74ARfb*Uc-fn=o^10`#)Ac(6F2QvX`E?5Rc*ec}Zm!!h1OHEP8Oi9fv z$xO>kEh<(>%u7+o$t+1NO3W!%NGwWKC`v6(EhyV&d)2(Oi3+L$jmEFEh@>(OIIk*$Sg@ME=WvHRmey!N(EVo>`VnTcPeOeagh-K zh6dO|ptRb+pmvCa0)|`34GCi`A;B;-0s+ODKyxX!P$(jzL@>0( z42B{llmLz!tYt!x0;)q5umu7UVPJ$M4A6?3!5RcqDi)1Li$!P+X>5p@A5c@-;7tlB zxt7elU~GiN|0AzhG{#apjcM3^ zauFXF7Z<01?i>q+g8bs*%%q%Dg_8X2)VyMNw;Ww1jT~-dX+>?9Cnn`X#w1coLB3Kj UGzSfalx60Z7At_q9E!QP0I=mZZ2$lO diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs index 066505c23df..380445d7a7f 100644 --- a/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs +++ b/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs @@ -1,5 +1,4 @@ -// FIXME(c_str_literals): This should be `run-pass` -// known-bug: #113333 +// run-pass // edition: 2021 #![feature(c_str_literals)] diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.stderr b/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.stderr deleted file mode 100644 index 47361fb61d2..00000000000 --- a/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.stderr +++ /dev/null @@ -1,38 +0,0 @@ -error: prefix `c` is unknown - --> $DIR/non-ascii.rs:9:9 - | -LL | c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(), - | ^ unknown prefix - | - = note: prefixed identifiers and literals are reserved since Rust 2021 -help: consider inserting whitespace here - | -LL | c "\xEF\x80🦀\u{1F980}".to_bytes_with_nul(), - | + - -error: out of range hex escape - --> $DIR/non-ascii.rs:9:11 - | -LL | c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(), - | ^^^^ must be a character in the range [\x00-\x7f] - -error: out of range hex escape - --> $DIR/non-ascii.rs:9:15 - | -LL | c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(), - | ^^^^ must be a character in the range [\x00-\x7f] - -error: no rules expected the token `"\xEF\x80🦀\u{1F980}"` - --> $DIR/non-ascii.rs:9:10 - | -LL | c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(), - | -^^^^^^^^^^^^^^^^^^^^ - | | - | no rules expected this token in macro call - | help: missing comma here - | -note: while trying to match `,` - --> $SRC_DIR/core/src/macros/mod.rs:LL:COL - -error: aborting due to 4 previous errors -