1
Fork 0

quote_expr macro: embed Ident using special encoding that preserves hygiene.

This adds support to `quote_expr!` and friends for round-trip hygienic
preservation of Ident.

Here are the pieces of the puzzle:

* adding a method for encoding Ident for re-reading into token tree.

* Support for reading such encoded Idents in the lexer.  Note that one
  must peek ahead for MOD_SEP after scan_embedded_hygienic_ident.

* To ensure that encoded Idents are only read when we are in the midst
  of expanding a `quote_expr` or similar, added a
  `read_embedded_ident` flag on `StringReader`.

* pprust support for encoding Ident's as (uint,uint) pairs (for hygiene).
This commit is contained in:
Felix S. Klock II 2014-08-01 17:11:53 +02:00
parent 9d554212de
commit c3ce245ba6
5 changed files with 345 additions and 31 deletions

View file

@ -55,6 +55,12 @@ impl Ident {
pub fn as_str<'a>(&'a self) -> &'a str {
self.name.as_str()
}
pub fn encode_with_hygiene(&self) -> String {
format!("\x00name_{:u},ctxt_{:u}\x00",
self.name.uint(),
self.ctxt)
}
}
impl Show for Ident {

View file

@ -97,6 +97,15 @@ pub mod rt {
fn to_source(&self) -> String;
}
// FIXME (Issue #16472): This should go away after ToToken impls
// are revised to go directly to token-trees.
trait ToSourceWithHygiene : ToSource {
// Takes a thing and generates a string containing rust code
// for it, encoding Idents as special byte sequences to
// maintain hygiene across serialization and deserialization.
fn to_source_with_hygiene(&self) -> String;
}
macro_rules! impl_to_source(
(Gc<$t:ty>, $pp:ident) => (
impl ToSource for Gc<$t> {
@ -104,6 +113,11 @@ pub mod rt {
pprust::$pp(&**self)
}
}
impl ToSourceWithHygiene for Gc<$t> {
fn to_source_with_hygiene(&self) -> String {
pprust::with_hygiene::$pp(&**self)
}
}
);
($t:ty, $pp:ident) => (
impl ToSource for $t {
@ -111,6 +125,11 @@ pub mod rt {
pprust::$pp(self)
}
}
impl ToSourceWithHygiene for $t {
fn to_source_with_hygiene(&self) -> String {
pprust::with_hygiene::$pp(self)
}
}
);
)
@ -122,6 +141,15 @@ pub mod rt {
.to_string()
}
fn slice_to_source_with_hygiene<'a, T: ToSourceWithHygiene>(
sep: &'static str, xs: &'a [T]) -> String {
xs.iter()
.map(|i| i.to_source_with_hygiene())
.collect::<Vec<String>>()
.connect(sep)
.to_string()
}
macro_rules! impl_to_source_slice(
($t:ty, $sep:expr) => (
impl<'a> ToSource for &'a [$t] {
@ -129,6 +157,12 @@ pub mod rt {
slice_to_source($sep, *self)
}
}
impl<'a> ToSourceWithHygiene for &'a [$t] {
fn to_source_with_hygiene(&self) -> String {
slice_to_source_with_hygiene($sep, *self)
}
}
)
)
@ -138,6 +172,12 @@ pub mod rt {
}
}
impl ToSourceWithHygiene for ast::Ident {
fn to_source_with_hygiene(&self) -> String {
self.encode_with_hygiene()
}
}
impl_to_source!(ast::Ty, ty_to_string)
impl_to_source!(ast::Block, block_to_string)
impl_to_source!(ast::Arg, arg_to_string)
@ -156,6 +196,11 @@ pub mod rt {
pprust::attribute_to_string(&dummy_spanned(*self))
}
}
impl ToSourceWithHygiene for ast::Attribute_ {
fn to_source_with_hygiene(&self) -> String {
self.to_source()
}
}
impl<'a> ToSource for &'a str {
fn to_source(&self) -> String {
@ -164,12 +209,22 @@ pub mod rt {
pprust::lit_to_string(&lit)
}
}
impl<'a> ToSourceWithHygiene for &'a str {
fn to_source_with_hygiene(&self) -> String {
self.to_source()
}
}
impl ToSource for () {
fn to_source(&self) -> String {
"()".to_string()
}
}
impl ToSourceWithHygiene for () {
fn to_source_with_hygiene(&self) -> String {
self.to_source()
}
}
impl ToSource for bool {
fn to_source(&self) -> String {
@ -177,6 +232,11 @@ pub mod rt {
pprust::lit_to_string(&lit)
}
}
impl ToSourceWithHygiene for bool {
fn to_source_with_hygiene(&self) -> String {
self.to_source()
}
}
impl ToSource for char {
fn to_source(&self) -> String {
@ -184,6 +244,11 @@ pub mod rt {
pprust::lit_to_string(&lit)
}
}
impl ToSourceWithHygiene for char {
fn to_source_with_hygiene(&self) -> String {
self.to_source()
}
}
macro_rules! impl_to_source_int(
(signed, $t:ty, $tag:ident) => (
@ -194,6 +259,11 @@ pub mod rt {
pprust::lit_to_string(&dummy_spanned(lit))
}
}
impl ToSourceWithHygiene for $t {
fn to_source_with_hygiene(&self) -> String {
self.to_source()
}
}
);
(unsigned, $t:ty, $tag:ident) => (
impl ToSource for $t {
@ -202,6 +272,11 @@ pub mod rt {
pprust::lit_to_string(&dummy_spanned(lit))
}
}
impl ToSourceWithHygiene for $t {
fn to_source_with_hygiene(&self) -> String {
self.to_source()
}
}
);
)
@ -223,7 +298,7 @@ pub mod rt {
($t:ty) => (
impl ToTokens for $t {
fn to_tokens(&self, cx: &ExtCtxt) -> Vec<TokenTree> {
cx.parse_tts(self.to_source())
cx.parse_tts_with_hygiene(self.to_source_with_hygiene())
}
}
)
@ -233,7 +308,7 @@ pub mod rt {
($t:ty) => (
impl<'a> ToTokens for $t {
fn to_tokens(&self, cx: &ExtCtxt) -> Vec<TokenTree> {
cx.parse_tts(self.to_source())
cx.parse_tts_with_hygiene(self.to_source_with_hygiene())
}
}
)
@ -275,6 +350,12 @@ pub mod rt {
fn parse_tts(&self, s: String) -> Vec<ast::TokenTree>;
}
trait ExtParseUtilsWithHygiene {
// FIXME (Issue #16472): This should go away after ToToken impls
// are revised to go directly to token-trees.
fn parse_tts_with_hygiene(&self, s: String) -> Vec<ast::TokenTree>;
}
impl<'a> ExtParseUtils for ExtCtxt<'a> {
fn parse_item(&self, s: String) -> Gc<ast::Item> {
@ -315,6 +396,18 @@ pub mod rt {
}
}
impl<'a> ExtParseUtilsWithHygiene for ExtCtxt<'a> {
fn parse_tts_with_hygiene(&self, s: String) -> Vec<ast::TokenTree> {
use parse::with_hygiene::parse_tts_from_source_str;
parse_tts_from_source_str("<quote expansion>".to_string(),
s,
self.cfg(),
self.parse_sess())
}
}
}
pub fn expand_quote_tokens(cx: &mut ExtCtxt,

View file

@ -17,7 +17,9 @@ use parse::token;
use parse::token::{str_to_ident};
use std::char;
use std::fmt;
use std::mem::replace;
use std::num;
use std::rc::Rc;
use std::str;
@ -55,6 +57,11 @@ pub struct StringReader<'a> {
/* cached: */
pub peek_tok: token::Token,
pub peek_span: Span,
// FIXME (Issue #16472): This field should go away after ToToken impls
// are revised to go directly to token-trees.
/// Is \x00<name>,<ctxt>\x00 is interpreted as encoded ast::Ident?
read_embedded_ident: bool,
}
impl<'a> Reader for StringReader<'a> {
@ -106,6 +113,17 @@ impl<'a> Reader for TtReader<'a> {
}
}
// FIXME (Issue #16472): This function should go away after
// ToToken impls are revised to go directly to token-trees.
pub fn make_reader_with_embedded_idents<'b>(span_diagnostic: &'b SpanHandler,
filemap: Rc<codemap::FileMap>)
-> StringReader<'b> {
let mut sr = StringReader::new_raw(span_diagnostic, filemap);
sr.read_embedded_ident = true;
sr.advance_token();
sr
}
impl<'a> StringReader<'a> {
/// For comments.rs, which hackily pokes into pos and curr
pub fn new_raw<'b>(span_diagnostic: &'b SpanHandler,
@ -120,6 +138,7 @@ impl<'a> StringReader<'a> {
/* dummy values; not read */
peek_tok: token::EOF,
peek_span: codemap::DUMMY_SP,
read_embedded_ident: false,
};
sr.bump();
sr
@ -512,6 +531,81 @@ impl<'a> StringReader<'a> {
})
}
// FIXME (Issue #16472): The scan_embedded_hygienic_ident function
// should go away after we revise the syntax::ext::quote::ToToken
// impls to go directly to token-trees instead of thing -> string
// -> token-trees. (The function is currently used to resolve
// Issues #15750 and #15962.)
//
// Since this function is only used for certain internal macros,
// and the functionality it provides is not exposed to end user
// programs, pnkfelix deliberately chose to write it in a way that
// favors rustc debugging effectiveness over runtime efficiency.
/// Scan through input of form \x00name_NNNNNN,ctxt_CCCCCCC\x00
/// where: `NNNNNN` is a string of characters forming an integer
/// (the name) and `CCCCCCC` is a string of characters forming an
/// integer (the ctxt), separate by a comma and delimited by a
/// `\x00` marker.
#[inline(never)]
fn scan_embedded_hygienic_ident(&mut self) -> ast::Ident {
fn bump_expecting_char<'a,D:fmt::Show>(r: &mut StringReader<'a>,
c: char,
described_c: D,
where: &str) {
match r.curr {
Some(r_c) if r_c == c => r.bump(),
Some(r_c) => fail!("expected {}, hit {}, {}", described_c, r_c, where),
None => fail!("expected {}, hit EOF, {}", described_c, where),
}
}
let where = "while scanning embedded hygienic ident";
// skip over the leading `\x00`
bump_expecting_char(self, '\x00', "nul-byte", where);
// skip over the "name_"
for c in "name_".chars() {
bump_expecting_char(self, c, c, where);
}
let start_bpos = self.last_pos;
let base = 10;
// find the integer representing the name
self.scan_digits(base);
let encoded_name : u32 = self.with_str_from(start_bpos, |s| {
num::from_str_radix(s, 10).unwrap_or_else(|| {
fail!("expected digits representing a name, got `{}`, {}, range [{},{}]",
s, where, start_bpos, self.last_pos);
})
});
// skip over the `,`
bump_expecting_char(self, ',', "comma", where);
// skip over the "ctxt_"
for c in "ctxt_".chars() {
bump_expecting_char(self, c, c, where);
}
// find the integer representing the ctxt
let start_bpos = self.last_pos;
self.scan_digits(base);
let encoded_ctxt : ast::SyntaxContext = self.with_str_from(start_bpos, |s| {
num::from_str_radix(s, 10).unwrap_or_else(|| {
fail!("expected digits representing a ctxt, got `{}`, {}", s, where);
})
});
// skip over the `\x00`
bump_expecting_char(self, '\x00', "nul-byte", where);
ast::Ident { name: ast::Name(encoded_name),
ctxt: encoded_ctxt, }
}
/// Scan through any digits (base `radix`) or underscores, and return how
/// many digits there were.
fn scan_digits(&mut self, radix: uint) -> uint {
@ -839,6 +933,17 @@ impl<'a> StringReader<'a> {
return self.scan_number(c.unwrap());
}
if self.read_embedded_ident {
match (c.unwrap(), self.nextch(), self.nextnextch()) {
('\x00', Some('n'), Some('a')) => {
let ast_ident = self.scan_embedded_hygienic_ident();
let is_mod_name = self.curr_is(':') && self.nextch_is(':');
return token::IDENT(ast_ident, is_mod_name);
}
_ => {}
}
}
match c.expect("next_token_inner called at EOF") {
// One-byte tokens.
';' => { self.bump(); return token::SEMI; }

View file

@ -144,6 +144,8 @@ pub fn parse_stmt_from_source_str(name: String,
maybe_aborted(p.parse_stmt(attrs),p)
}
// Note: keep in sync with `with_hygiene::parse_tts_from_source_str`
// until #16472 is resolved.
pub fn parse_tts_from_source_str(name: String,
source: String,
cfg: ast::CrateConfig,
@ -160,6 +162,8 @@ pub fn parse_tts_from_source_str(name: String,
maybe_aborted(p.parse_all_token_trees(),p)
}
// Note: keep in sync with `with_hygiene::new_parser_from_source_str`
// until #16472 is resolved.
// Create a new parser from a source string
pub fn new_parser_from_source_str<'a>(sess: &'a ParseSess,
cfg: ast::CrateConfig,
@ -192,6 +196,8 @@ pub fn new_sub_parser_from_file<'a>(sess: &'a ParseSess,
p
}
// Note: keep this in sync with `with_hygiene::filemap_to_parser` until
// #16472 is resolved.
/// Given a filemap and config, return a parser
pub fn filemap_to_parser<'a>(sess: &'a ParseSess,
filemap: Rc<FileMap>,
@ -248,6 +254,8 @@ pub fn string_to_filemap(sess: &ParseSess, source: String, path: String)
sess.span_diagnostic.cm.new_filemap(path, source)
}
// Note: keep this in sync with `with_hygiene::filemap_to_tts` (apart
// from the StringReader constructor), until #16472 is resolved.
/// Given a filemap, produce a sequence of token-trees
pub fn filemap_to_tts(sess: &ParseSess, filemap: Rc<FileMap>)
-> Vec<ast::TokenTree> {
@ -267,6 +275,67 @@ pub fn tts_to_parser<'a>(sess: &'a ParseSess,
Parser::new(sess, cfg, box trdr)
}
// FIXME (Issue #16472): The `with_hygiene` mod should go away after
// ToToken impls are revised to go directly to token-trees.
pub mod with_hygiene {
use ast;
use codemap::FileMap;
use parse::parser::Parser;
use std::rc::Rc;
use super::ParseSess;
use super::{maybe_aborted, string_to_filemap, tts_to_parser};
// Note: keep this in sync with `super::parse_tts_from_source_str` until
// #16472 is resolved.
pub fn parse_tts_from_source_str(name: String,
source: String,
cfg: ast::CrateConfig,
sess: &ParseSess) -> Vec<ast::TokenTree> {
let mut p = new_parser_from_source_str(
sess,
cfg,
name,
source
);
p.quote_depth += 1u;
// right now this is re-creating the token trees from ... token trees.
maybe_aborted(p.parse_all_token_trees(),p)
}
// Note: keep this in sync with `super::new_parser_from_source_str` until
// #16472 is resolved.
// Create a new parser from a source string
fn new_parser_from_source_str<'a>(sess: &'a ParseSess,
cfg: ast::CrateConfig,
name: String,
source: String) -> Parser<'a> {
filemap_to_parser(sess, string_to_filemap(sess, source, name), cfg)
}
// Note: keep this in sync with `super::filemap_to_parserr` until
// #16472 is resolved.
/// Given a filemap and config, return a parser
fn filemap_to_parser<'a>(sess: &'a ParseSess,
filemap: Rc<FileMap>,
cfg: ast::CrateConfig) -> Parser<'a> {
tts_to_parser(sess, filemap_to_tts(sess, filemap), cfg)
}
// Note: keep this in sync with `super::filemap_to_tts` until
// #16472 is resolved.
/// Given a filemap, produce a sequence of token-trees
fn filemap_to_tts(sess: &ParseSess, filemap: Rc<FileMap>)
-> Vec<ast::TokenTree> {
// it appears to me that the cfg doesn't matter here... indeed,
// parsing tt's probably shouldn't require a parser at all.
use make_reader = super::lexer::make_reader_with_embedded_idents;
let cfg = Vec::new();
let srdr = make_reader(&sess.span_diagnostic, filemap);
let mut p1 = Parser::new(sess, cfg, box srdr);
p1.parse_all_token_trees()
}
}
/// Abort if necessary
pub fn maybe_aborted<T>(result: T, mut p: Parser) -> T {
p.abort_if_errors();

View file

@ -58,7 +58,8 @@ pub struct State<'a> {
literals: Option<Vec<comments::Literal> >,
cur_cmnt_and_lit: CurrentCommentAndLiteral,
boxes: Vec<pp::Breaks>,
ann: &'a PpAnn
ann: &'a PpAnn,
encode_idents_with_hygiene: bool,
}
pub fn rust_printer(writer: Box<io::Writer>) -> State<'static> {
@ -78,7 +79,8 @@ pub fn rust_printer_annotated<'a>(writer: Box<io::Writer>,
cur_lit: 0
},
boxes: Vec::new(),
ann: ann
ann: ann,
encode_idents_with_hygiene: false,
}
}
@ -148,7 +150,8 @@ impl<'a> State<'a> {
cur_lit: 0
},
boxes: Vec::new(),
ann: ann
ann: ann,
encode_idents_with_hygiene: false,
}
}
}
@ -169,70 +172,77 @@ pub fn to_string(f: |&mut State| -> IoResult<()>) -> String {
}
}
// FIXME (Issue #16472): the thing_to_string_impls macro should go away
// after we revise the syntax::ext::quote::ToToken impls to go directly
// to token-trees instea of thing -> string -> token-trees.
macro_rules! thing_to_string_impls {
($to_string:ident) => {
pub fn ty_to_string(ty: &ast::Ty) -> String {
to_string(|s| s.print_type(ty))
$to_string(|s| s.print_type(ty))
}
pub fn pat_to_string(pat: &ast::Pat) -> String {
to_string(|s| s.print_pat(pat))
$to_string(|s| s.print_pat(pat))
}
pub fn arm_to_string(arm: &ast::Arm) -> String {
to_string(|s| s.print_arm(arm))
$to_string(|s| s.print_arm(arm))
}
pub fn expr_to_string(e: &ast::Expr) -> String {
to_string(|s| s.print_expr(e))
$to_string(|s| s.print_expr(e))
}
pub fn lifetime_to_string(e: &ast::Lifetime) -> String {
to_string(|s| s.print_lifetime(e))
$to_string(|s| s.print_lifetime(e))
}
pub fn tt_to_string(tt: &ast::TokenTree) -> String {
to_string(|s| s.print_tt(tt))
$to_string(|s| s.print_tt(tt))
}
pub fn tts_to_string(tts: &[ast::TokenTree]) -> String {
to_string(|s| s.print_tts(tts))
$to_string(|s| s.print_tts(tts))
}
pub fn stmt_to_string(stmt: &ast::Stmt) -> String {
to_string(|s| s.print_stmt(stmt))
$to_string(|s| s.print_stmt(stmt))
}
pub fn item_to_string(i: &ast::Item) -> String {
to_string(|s| s.print_item(i))
$to_string(|s| s.print_item(i))
}
pub fn generics_to_string(generics: &ast::Generics) -> String {
to_string(|s| s.print_generics(generics))
$to_string(|s| s.print_generics(generics))
}
pub fn ty_method_to_string(p: &ast::TypeMethod) -> String {
to_string(|s| s.print_ty_method(p))
$to_string(|s| s.print_ty_method(p))
}
pub fn method_to_string(p: &ast::Method) -> String {
to_string(|s| s.print_method(p))
$to_string(|s| s.print_method(p))
}
pub fn fn_block_to_string(p: &ast::FnDecl) -> String {
to_string(|s| s.print_fn_block_args(p, false))
$to_string(|s| s.print_fn_block_args(p, false))
}
pub fn path_to_string(p: &ast::Path) -> String {
to_string(|s| s.print_path(p, false))
$to_string(|s| s.print_path(p, false))
}
pub fn ident_to_string(id: &ast::Ident) -> String {
to_string(|s| s.print_ident(*id))
$to_string(|s| s.print_ident(*id))
}
pub fn fun_to_string(decl: &ast::FnDecl, fn_style: ast::FnStyle, name: ast::Ident,
opt_explicit_self: Option<ast::ExplicitSelf_>,
generics: &ast::Generics) -> String {
to_string(|s| {
$to_string(|s| {
try!(s.print_fn(decl, Some(fn_style), abi::Rust,
name, generics, opt_explicit_self, ast::Inherited));
try!(s.end()); // Close the head box
@ -241,7 +251,7 @@ pub fn fun_to_string(decl: &ast::FnDecl, fn_style: ast::FnStyle, name: ast::Iden
}
pub fn block_to_string(blk: &ast::Block) -> String {
to_string(|s| {
$to_string(|s| {
// containing cbox, will be closed by print-block at }
try!(s.cbox(indent_unit));
// head-ibox, will be closed by print-block after {
@ -251,31 +261,57 @@ pub fn block_to_string(blk: &ast::Block) -> String {
}
pub fn meta_item_to_string(mi: &ast::MetaItem) -> String {
to_string(|s| s.print_meta_item(mi))
$to_string(|s| s.print_meta_item(mi))
}
pub fn attribute_to_string(attr: &ast::Attribute) -> String {
to_string(|s| s.print_attribute(attr))
$to_string(|s| s.print_attribute(attr))
}
pub fn lit_to_string(l: &ast::Lit) -> String {
to_string(|s| s.print_literal(l))
$to_string(|s| s.print_literal(l))
}
pub fn explicit_self_to_string(explicit_self: ast::ExplicitSelf_) -> String {
to_string(|s| s.print_explicit_self(explicit_self, ast::MutImmutable).map(|_| {}))
$to_string(|s| s.print_explicit_self(explicit_self, ast::MutImmutable).map(|_| {}))
}
pub fn variant_to_string(var: &ast::Variant) -> String {
to_string(|s| s.print_variant(var))
$to_string(|s| s.print_variant(var))
}
pub fn arg_to_string(arg: &ast::Arg) -> String {
to_string(|s| s.print_arg(arg))
$to_string(|s| s.print_arg(arg))
}
pub fn mac_to_string(arg: &ast::Mac) -> String {
to_string(|s| s.print_mac(arg))
$to_string(|s| s.print_mac(arg))
}
} }
thing_to_string_impls!(to_string)
// FIXME (Issue #16472): the whole `with_hygiene` mod should go away
// after we revise the syntax::ext::quote::ToToken impls to go directly
// to token-trees instea of thing -> string -> token-trees.
pub mod with_hygiene {
use abi;
use ast;
use std::io::IoResult;
use super::indent_unit;
// This function is the trick that all the rest of the routines
// hang on.
pub fn to_string_hyg(f: |&mut super::State| -> IoResult<()>) -> String {
super::to_string(|s| {
s.encode_idents_with_hygiene = true;
f(s)
})
}
thing_to_string_impls!(to_string_hyg)
}
pub fn visibility_qualified(vis: ast::Visibility, s: &str) -> String {
@ -1672,8 +1708,13 @@ impl<'a> State<'a> {
}
pub fn print_ident(&mut self, ident: ast::Ident) -> IoResult<()> {
if self.encode_idents_with_hygiene {
let encoded = ident.encode_with_hygiene();
word(&mut self.s, encoded.as_slice())
} else {
word(&mut self.s, token::get_ident(ident).get())
}
}
pub fn print_name(&mut self, name: ast::Name) -> IoResult<()> {
word(&mut self.s, token::get_name(name).get())