auto merge of #14423 : Kimundi/rust/static_regex, r=alexcrichton
This patch changes the internals of `Regex` and `regex!()` such that ```rust static RE: Regex = regex!(...); ``` is valid. It doesn't change anything about the actual regex implementation, it just changes the type to something that can be constructed as a const expression.
This commit is contained in:
commit
db2ddb1bba
5 changed files with 126 additions and 45 deletions
|
@ -401,7 +401,7 @@ pub mod native {
|
||||||
// undesirable consequences (such as requiring a dependency on
|
// undesirable consequences (such as requiring a dependency on
|
||||||
// `libsyntax`).
|
// `libsyntax`).
|
||||||
//
|
//
|
||||||
// Secondly, the code generated generated by `regex!` must *also* be able
|
// Secondly, the code generated by `regex!` must *also* be able
|
||||||
// to access various functions in this crate to reduce code duplication
|
// to access various functions in this crate to reduce code duplication
|
||||||
// and to provide a value with precisely the same `Regex` type in this
|
// and to provide a value with precisely the same `Regex` type in this
|
||||||
// crate. This, AFAIK, is impossible to mitigate.
|
// crate. This, AFAIK, is impossible to mitigate.
|
||||||
|
|
|
@ -100,38 +100,45 @@ pub fn is_match(regex: &str, text: &str) -> Result<bool, parse::Error> {
|
||||||
/// documentation.
|
/// documentation.
|
||||||
#[deriving(Clone)]
|
#[deriving(Clone)]
|
||||||
#[allow(visible_private_types)]
|
#[allow(visible_private_types)]
|
||||||
pub struct Regex {
|
pub enum Regex {
|
||||||
/// The representation of `Regex` is exported to support the `regex!`
|
// The representation of `Regex` is exported to support the `regex!`
|
||||||
/// syntax extension. Do not rely on it.
|
// syntax extension. Do not rely on it.
|
||||||
///
|
//
|
||||||
/// See the comments for the `program` module in `lib.rs` for a more
|
// See the comments for the `program` module in `lib.rs` for a more
|
||||||
/// detailed explanation for what `regex!` requires.
|
// detailed explanation for what `regex!` requires.
|
||||||
#[doc(hidden)]
|
#[doc(hidden)]
|
||||||
pub original: String,
|
Dynamic(Dynamic),
|
||||||
#[doc(hidden)]
|
#[doc(hidden)]
|
||||||
pub names: Vec<Option<String>>,
|
Native(Native),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[deriving(Clone)]
|
||||||
|
#[doc(hidden)]
|
||||||
|
pub struct Dynamic {
|
||||||
|
original: String,
|
||||||
|
names: Vec<Option<String>>,
|
||||||
#[doc(hidden)]
|
#[doc(hidden)]
|
||||||
pub p: MaybeNative,
|
pub prog: Program
|
||||||
|
}
|
||||||
|
|
||||||
|
#[doc(hidden)]
|
||||||
|
pub struct Native {
|
||||||
|
#[doc(hidden)]
|
||||||
|
pub original: &'static str,
|
||||||
|
#[doc(hidden)]
|
||||||
|
pub names: &'static [Option<&'static str>],
|
||||||
|
#[doc(hidden)]
|
||||||
|
pub prog: fn(MatchKind, &str, uint, uint) -> Vec<Option<uint>>
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Clone for Native {
|
||||||
|
fn clone(&self) -> Native { *self }
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Show for Regex {
|
impl fmt::Show for Regex {
|
||||||
/// Shows the original regular expression.
|
/// Shows the original regular expression.
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
write!(f, "{}", self.original)
|
write!(f, "{}", self.as_str())
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub enum MaybeNative {
|
|
||||||
Dynamic(Program),
|
|
||||||
Native(fn(MatchKind, &str, uint, uint) -> Vec<Option<uint>>),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Clone for MaybeNative {
|
|
||||||
fn clone(&self) -> MaybeNative {
|
|
||||||
match *self {
|
|
||||||
Dynamic(ref p) => Dynamic(p.clone()),
|
|
||||||
Native(fp) => Native(fp),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -146,10 +153,11 @@ impl Regex {
|
||||||
pub fn new(re: &str) -> Result<Regex, parse::Error> {
|
pub fn new(re: &str) -> Result<Regex, parse::Error> {
|
||||||
let ast = try!(parse::parse(re));
|
let ast = try!(parse::parse(re));
|
||||||
let (prog, names) = Program::new(ast);
|
let (prog, names) = Program::new(ast);
|
||||||
Ok(Regex {
|
Ok(Dynamic(Dynamic {
|
||||||
original: re.to_strbuf(),
|
original: re.to_strbuf(),
|
||||||
names: names, p: Dynamic(prog),
|
names: names,
|
||||||
})
|
prog: prog,
|
||||||
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if and only if the regex matches the string given.
|
/// Returns true if and only if the regex matches the string given.
|
||||||
|
@ -495,6 +503,46 @@ impl Regex {
|
||||||
}
|
}
|
||||||
new.append(text.slice(last_match, text.len()))
|
new.append(text.slice(last_match, text.len()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the original string of this regex.
|
||||||
|
pub fn as_str<'a>(&'a self) -> &'a str {
|
||||||
|
match *self {
|
||||||
|
Dynamic(Dynamic { ref original, .. }) => original.as_slice(),
|
||||||
|
Native(Native { ref original, .. }) => original.as_slice(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[doc(hidden)]
|
||||||
|
#[allow(visible_private_types)]
|
||||||
|
#[experimental]
|
||||||
|
pub fn names_iter<'a>(&'a self) -> NamesIter<'a> {
|
||||||
|
match *self {
|
||||||
|
Native(ref n) => NamesIterNative(n.names.iter()),
|
||||||
|
Dynamic(ref d) => NamesIterDynamic(d.names.iter())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn names_len(&self) -> uint {
|
||||||
|
match *self {
|
||||||
|
Native(ref n) => n.names.len(),
|
||||||
|
Dynamic(ref d) => d.names.len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
enum NamesIter<'a> {
|
||||||
|
NamesIterNative(::std::slice::Items<'a, Option<&'static str>>),
|
||||||
|
NamesIterDynamic(::std::slice::Items<'a, Option<String>>)
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Iterator<Option<String>> for NamesIter<'a> {
|
||||||
|
fn next(&mut self) -> Option<Option<String>> {
|
||||||
|
match *self {
|
||||||
|
NamesIterNative(ref mut i) => i.next().map(|x| x.map(|s| s.to_strbuf())),
|
||||||
|
NamesIterDynamic(ref mut i) => i.next().map(|x| x.as_ref().map(|s| s.to_strbuf())),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// NoExpand indicates literal string replacement.
|
/// NoExpand indicates literal string replacement.
|
||||||
|
@ -612,6 +660,7 @@ pub struct Captures<'t> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'t> Captures<'t> {
|
impl<'t> Captures<'t> {
|
||||||
|
#[allow(experimental)]
|
||||||
fn new(re: &Regex, search: &'t str, locs: CaptureLocs)
|
fn new(re: &Regex, search: &'t str, locs: CaptureLocs)
|
||||||
-> Option<Captures<'t>> {
|
-> Option<Captures<'t>> {
|
||||||
if !has_match(&locs) {
|
if !has_match(&locs) {
|
||||||
|
@ -619,15 +668,15 @@ impl<'t> Captures<'t> {
|
||||||
}
|
}
|
||||||
|
|
||||||
let named =
|
let named =
|
||||||
if re.names.len() == 0 {
|
if re.names_len() == 0 {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
let mut named = HashMap::new();
|
let mut named = HashMap::new();
|
||||||
for (i, name) in re.names.iter().enumerate() {
|
for (i, name) in re.names_iter().enumerate() {
|
||||||
match name {
|
match name {
|
||||||
&None => {},
|
None => {},
|
||||||
&Some(ref name) => {
|
Some(name) => {
|
||||||
named.insert(name.to_strbuf(), i);
|
named.insert(name, i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -862,9 +911,9 @@ fn exec(re: &Regex, which: MatchKind, input: &str) -> CaptureLocs {
|
||||||
|
|
||||||
fn exec_slice(re: &Regex, which: MatchKind,
|
fn exec_slice(re: &Regex, which: MatchKind,
|
||||||
input: &str, s: uint, e: uint) -> CaptureLocs {
|
input: &str, s: uint, e: uint) -> CaptureLocs {
|
||||||
match re.p {
|
match *re {
|
||||||
Dynamic(ref prog) => vm::run(which, prog, input, s, e),
|
Dynamic(Dynamic { ref prog, .. }) => vm::run(which, prog, input, s, e),
|
||||||
Native(exec) => exec(which, input, s, e),
|
Native(Native { prog, .. }) => prog(which, input, s, e),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,9 @@ mod native_bench;
|
||||||
#[path = "tests.rs"]
|
#[path = "tests.rs"]
|
||||||
mod native_tests;
|
mod native_tests;
|
||||||
|
|
||||||
|
#[cfg(not(stage1))]
|
||||||
|
mod native_static;
|
||||||
|
|
||||||
// Due to macro scoping rules, this definition only applies for the modules
|
// Due to macro scoping rules, this definition only applies for the modules
|
||||||
// defined below. Effectively, it allows us to use the same tests for both
|
// defined below. Effectively, it allows us to use the same tests for both
|
||||||
// native and dynamic regexes.
|
// native and dynamic regexes.
|
||||||
|
|
26
src/libregex/test/native_static.rs
Normal file
26
src/libregex/test/native_static.rs
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||||
|
// file at the top-level directory of this distribution and at
|
||||||
|
// http://rust-lang.org/COPYRIGHT.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||||
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||||
|
// option. This file may not be copied, modified, or distributed
|
||||||
|
// except according to those terms.
|
||||||
|
|
||||||
|
use regex::Regex;
|
||||||
|
static RE: Regex = regex!(r"\d+");
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn static_splitn() {
|
||||||
|
let text = "cauchy123plato456tyler789binx";
|
||||||
|
let subs: Vec<&str> = RE.splitn(text, 2).collect();
|
||||||
|
assert_eq!(subs, vec!("cauchy", "plato456tyler789binx"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn static_split() {
|
||||||
|
let text = "cauchy123plato456tyler789binx";
|
||||||
|
let subs: Vec<&str> = RE.split(text).collect();
|
||||||
|
assert_eq!(subs, vec!("cauchy", "plato", "tyler", "binx"));
|
||||||
|
}
|
|
@ -75,6 +75,7 @@ pub fn macro_registrar(register: |ast::Name, SyntaxExtension|) {
|
||||||
/// It is strongly recommended to read the dynamic implementation in vm.rs
|
/// It is strongly recommended to read the dynamic implementation in vm.rs
|
||||||
/// first before trying to understand the code generator. The implementation
|
/// first before trying to understand the code generator. The implementation
|
||||||
/// strategy is identical and vm.rs has comments and will be easier to follow.
|
/// strategy is identical and vm.rs has comments and will be easier to follow.
|
||||||
|
#[allow(experimental)]
|
||||||
fn native(cx: &mut ExtCtxt, sp: codemap::Span, tts: &[ast::TokenTree])
|
fn native(cx: &mut ExtCtxt, sp: codemap::Span, tts: &[ast::TokenTree])
|
||||||
-> Box<MacResult> {
|
-> Box<MacResult> {
|
||||||
let regex = match parse(cx, tts) {
|
let regex = match parse(cx, tts) {
|
||||||
|
@ -89,14 +90,14 @@ fn native(cx: &mut ExtCtxt, sp: codemap::Span, tts: &[ast::TokenTree])
|
||||||
return DummyResult::any(sp)
|
return DummyResult::any(sp)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let prog = match re.p {
|
let prog = match re {
|
||||||
Dynamic(ref prog) => prog.clone(),
|
Dynamic(Dynamic { ref prog, .. }) => prog.clone(),
|
||||||
Native(_) => unreachable!(),
|
Native(_) => unreachable!(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut gen = NfaGen {
|
let mut gen = NfaGen {
|
||||||
cx: &*cx, sp: sp, prog: prog,
|
cx: &*cx, sp: sp, prog: prog,
|
||||||
names: re.names.clone(), original: re.original.clone(),
|
names: re.names_iter().collect(), original: re.as_str().to_strbuf(),
|
||||||
};
|
};
|
||||||
MacExpr::new(gen.code())
|
MacExpr::new(gen.code())
|
||||||
}
|
}
|
||||||
|
@ -119,7 +120,7 @@ impl<'a> NfaGen<'a> {
|
||||||
|cx, name| match *name {
|
|cx, name| match *name {
|
||||||
Some(ref name) => {
|
Some(ref name) => {
|
||||||
let name = name.as_slice();
|
let name = name.as_slice();
|
||||||
quote_expr!(cx, Some($name.to_strbuf()))
|
quote_expr!(cx, Some($name))
|
||||||
}
|
}
|
||||||
None => cx.expr_none(self.sp),
|
None => cx.expr_none(self.sp),
|
||||||
}
|
}
|
||||||
|
@ -141,9 +142,11 @@ impl<'a> NfaGen<'a> {
|
||||||
let regex = self.original.as_slice();
|
let regex = self.original.as_slice();
|
||||||
|
|
||||||
quote_expr!(self.cx, {
|
quote_expr!(self.cx, {
|
||||||
|
static CAP_NAMES: &'static [Option<&'static str>] = &$cap_names;
|
||||||
fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
|
fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
|
||||||
start: uint, end: uint) -> Vec<Option<uint>> {
|
start: uint, end: uint) -> Vec<Option<uint>> {
|
||||||
#![allow(unused_imports)]
|
#![allow(unused_imports)]
|
||||||
|
#![allow(unused_mut)]
|
||||||
use regex::native::{
|
use regex::native::{
|
||||||
MatchKind, Exists, Location, Submatches,
|
MatchKind, Exists, Location, Submatches,
|
||||||
StepState, StepMatchEarlyReturn, StepMatch, StepContinue,
|
StepState, StepMatchEarlyReturn, StepMatch, StepContinue,
|
||||||
|
@ -310,11 +313,11 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
::regex::Regex {
|
::regex::native::Native(::regex::native::Native {
|
||||||
original: $regex.to_strbuf(),
|
original: $regex,
|
||||||
names: vec!$cap_names,
|
names: CAP_NAMES,
|
||||||
p: ::regex::native::Native(exec),
|
prog: exec,
|
||||||
}
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue