use std::cell::RefCell;
use std::mem;
use regex_syntax::hir::{self, Hir, HirKind};
use regex_syntax::utf8::{Utf8Range, Utf8Sequences};
use classes::ByteClassSet;
use error::{Error, Result};
use nfa::map::{Utf8BoundedMap, Utf8SuffixKey, Utf8SuffixMap};
use nfa::range_trie::RangeTrie;
use nfa::{State, StateID, Transition, NFA};
#[derive(Clone, Copy, Debug)]
struct Config {
anchored: bool,
allow_invalid_utf8: bool,
reverse: bool,
shrink: bool,
}
impl Default for Config {
fn default() -> Config {
Config {
anchored: false,
allow_invalid_utf8: false,
reverse: false,
shrink: true,
}
}
}
#[derive(Clone, Debug)]
pub struct Builder {
config: Config,
}
impl Builder {
pub fn new() -> Builder {
Builder { config: Config::default() }
}
pub fn build(&self, expr: &Hir) -> Result<NFA> {
let mut nfa = NFA::always_match();
self.build_with(&mut Compiler::new(), &mut nfa, expr)?;
Ok(nfa)
}
pub fn build_with(
&self,
compiler: &mut Compiler,
nfa: &mut NFA,
expr: &Hir,
) -> Result<()> {
compiler.clear();
compiler.configure(self.config);
compiler.compile(nfa, expr)
}
pub fn anchored(&mut self, yes: bool) -> &mut Builder {
self.config.anchored = yes;
self
}
pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut Builder {
self.config.allow_invalid_utf8 = yes;
self
}
pub fn reverse(&mut self, yes: bool) -> &mut Builder {
self.config.reverse = yes;
self
}
pub fn shrink(&mut self, yes: bool) -> &mut Builder {
self.config.shrink = yes;
self
}
}
#[derive(Clone, Debug)]
pub struct Compiler {
states: RefCell<Vec<CState>>,
config: Config,
utf8_state: RefCell<Utf8State>,
trie_state: RefCell<RangeTrie>,
utf8_suffix: RefCell<Utf8SuffixMap>,
remap: RefCell<Vec<StateID>>,
empties: RefCell<Vec<(StateID, StateID)>>,
}
#[derive(Clone, Debug, Eq, PartialEq)]
enum CState {
Empty { next: StateID },
Range { range: Transition },
Sparse { ranges: Vec<Transition> },
Union { alternates: Vec<StateID> },
UnionReverse { alternates: Vec<StateID> },
Match,
}
#[derive(Clone, Copy, Debug)]
pub struct ThompsonRef {
start: StateID,
end: StateID,
}
impl Compiler {
pub fn new() -> Compiler {
Compiler {
states: RefCell::new(vec![]),
config: Config::default(),
utf8_state: RefCell::new(Utf8State::new()),
trie_state: RefCell::new(RangeTrie::new()),
utf8_suffix: RefCell::new(Utf8SuffixMap::new(1000)),
remap: RefCell::new(vec![]),
empties: RefCell::new(vec![]),
}
}
fn clear(&self) {
self.states.borrow_mut().clear();
}
fn configure(&mut self, config: Config) {
self.config = config;
}
fn compile(&self, nfa: &mut NFA, expr: &Hir) -> Result<()> {
nfa.anchored = self.config.anchored;
let mut start = self.add_empty();
if !nfa.anchored {
let compiled = if self.config.allow_invalid_utf8 {
self.c_unanchored_prefix_invalid_utf8()?
} else {
self.c_unanchored_prefix_valid_utf8()?
};
self.patch(start, compiled.start);
start = compiled.end;
}
let compiled = self.c(&expr)?;
let match_id = self.add_match();
self.patch(start, compiled.start);
self.patch(compiled.end, match_id);
self.finish(nfa);
Ok(())
}
fn finish(&self, nfa: &mut NFA) {
let mut bstates = self.states.borrow_mut();
let mut remap = self.remap.borrow_mut();
remap.resize(bstates.len(), 0);
let mut empties = self.empties.borrow_mut();
empties.clear();
nfa.states.clear();
let mut byteset = ByteClassSet::new();
for (id, bstate) in bstates.iter_mut().enumerate() {
match *bstate {
CState::Empty { next } => {
empties.push((id, next));
}
CState::Range { ref range } => {
remap[id] = nfa.states.len();
byteset.set_range(range.start, range.end);
nfa.states.push(State::Range { range: range.clone() });
}
CState::Sparse { ref mut ranges } => {
remap[id] = nfa.states.len();
let ranges = mem::replace(ranges, vec![]);
for r in &ranges {
byteset.set_range(r.start, r.end);
}
nfa.states.push(State::Sparse {
ranges: ranges.into_boxed_slice(),
});
}
CState::Union { ref mut alternates } => {
remap[id] = nfa.states.len();
let alternates = mem::replace(alternates, vec![]);
nfa.states.push(State::Union {
alternates: alternates.into_boxed_slice(),
});
}
CState::UnionReverse { ref mut alternates } => {
remap[id] = nfa.states.len();
let mut alternates = mem::replace(alternates, vec![]);
alternates.reverse();
nfa.states.push(State::Union {
alternates: alternates.into_boxed_slice(),
});
}
CState::Match => {
remap[id] = nfa.states.len();
nfa.states.push(State::Match);
}
}
}
for &(empty_id, mut empty_next) in empties.iter() {
while let CState::Empty { next } = bstates[empty_next] {
empty_next = next;
}
remap[empty_id] = remap[empty_next];
}
for state in &mut nfa.states {
state.remap(&remap);
}
nfa.start = remap[0];
nfa.byte_classes = byteset.byte_classes();
}
fn c(&self, expr: &Hir) -> Result<ThompsonRef> {
match *expr.kind() {
HirKind::Empty => {
let id = self.add_empty();
Ok(ThompsonRef { start: id, end: id })
}
HirKind::Literal(hir::Literal::Unicode(ch)) => {
let mut buf = [0; 4];
let it = ch
.encode_utf8(&mut buf)
.as_bytes()
.iter()
.map(|&b| Ok(self.c_range(b, b)));
self.c_concat(it)
}
HirKind::Literal(hir::Literal::Byte(b)) => Ok(self.c_range(b, b)),
HirKind::Class(hir::Class::Bytes(ref cls)) => {
self.c_byte_class(cls)
}
HirKind::Class(hir::Class::Unicode(ref cls)) => {
self.c_unicode_class(cls)
}
HirKind::Repetition(ref rep) => self.c_repetition(rep),
HirKind::Group(ref group) => self.c(&*group.hir),
HirKind::Concat(ref exprs) => {
self.c_concat(exprs.iter().map(|e| self.c(e)))
}
HirKind::Alternation(ref exprs) => {
self.c_alternation(exprs.iter().map(|e| self.c(e)))
}
HirKind::Anchor(_) => Err(Error::unsupported_anchor()),
HirKind::WordBoundary(_) => Err(Error::unsupported_word()),
}
}
fn c_concat<I>(&self, mut it: I) -> Result<ThompsonRef>
where
I: DoubleEndedIterator<Item = Result<ThompsonRef>>,
{
let first =
if self.config.reverse { it.next_back() } else { it.next() };
let ThompsonRef { start, mut end } = match first {
Some(result) => result?,
None => return Ok(self.c_empty()),
};
loop {
let next =
if self.config.reverse { it.next_back() } else { it.next() };
let compiled = match next {
Some(result) => result?,
None => break,
};
self.patch(end, compiled.start);
end = compiled.end;
}
Ok(ThompsonRef { start, end })
}
fn c_alternation<I>(&self, mut it: I) -> Result<ThompsonRef>
where
I: Iterator<Item = Result<ThompsonRef>>,
{
let first = it.next().expect("alternations must be non-empty")?;
let second = match it.next() {
None => return Ok(first),
Some(result) => result?,
};
let union = self.add_union();
let end = self.add_empty();
self.patch(union, first.start);
self.patch(first.end, end);
self.patch(union, second.start);
self.patch(second.end, end);
for result in it {
let compiled = result?;
self.patch(union, compiled.start);
self.patch(compiled.end, end);
}
Ok(ThompsonRef { start: union, end })
}
fn c_repetition(&self, rep: &hir::Repetition) -> Result<ThompsonRef> {
match rep.kind {
hir::RepetitionKind::ZeroOrOne => {
self.c_zero_or_one(&rep.hir, rep.greedy)
}
hir::RepetitionKind::ZeroOrMore => {
self.c_at_least(&rep.hir, rep.greedy, 0)
}
hir::RepetitionKind::OneOrMore => {
self.c_at_least(&rep.hir, rep.greedy, 1)
}
hir::RepetitionKind::Range(ref rng) => match *rng {
hir::RepetitionRange::Exactly(count) => {
self.c_exactly(&rep.hir, count)
}
hir::RepetitionRange::AtLeast(m) => {
self.c_at_least(&rep.hir, rep.greedy, m)
}
hir::RepetitionRange::Bounded(min, max) => {
self.c_bounded(&rep.hir, rep.greedy, min, max)
}
},
}
}
fn c_bounded(
&self,
expr: &Hir,
greedy: bool,
min: u32,
max: u32,
) -> Result<ThompsonRef> {
let prefix = self.c_exactly(expr, min)?;
if min == max {
return Ok(prefix);
}
let empty = self.add_empty();
let mut prev_end = prefix.end;
for _ in min..max {
let union = if greedy {
self.add_union()
} else {
self.add_reverse_union()
};
let compiled = self.c(expr)?;
self.patch(prev_end, union);
self.patch(union, compiled.start);
self.patch(union, empty);
prev_end = compiled.end;
}
self.patch(prev_end, empty);
Ok(ThompsonRef { start: prefix.start, end: empty })
}
fn c_at_least(
&self,
expr: &Hir,
greedy: bool,
n: u32,
) -> Result<ThompsonRef> {
if n == 0 {
let union = if greedy {
self.add_union()
} else {
self.add_reverse_union()
};
let compiled = self.c(expr)?;
self.patch(union, compiled.start);
self.patch(compiled.end, union);
Ok(ThompsonRef { start: union, end: union })
} else if n == 1 {
let compiled = self.c(expr)?;
let union = if greedy {
self.add_union()
} else {
self.add_reverse_union()
};
self.patch(compiled.end, union);
self.patch(union, compiled.start);
Ok(ThompsonRef { start: compiled.start, end: union })
} else {
let prefix = self.c_exactly(expr, n - 1)?;
let last = self.c(expr)?;
let union = if greedy {
self.add_union()
} else {
self.add_reverse_union()
};
self.patch(prefix.end, last.start);
self.patch(last.end, union);
self.patch(union, last.start);
Ok(ThompsonRef { start: prefix.start, end: union })
}
}
fn c_zero_or_one(&self, expr: &Hir, greedy: bool) -> Result<ThompsonRef> {
let union =
if greedy { self.add_union() } else { self.add_reverse_union() };
let compiled = self.c(expr)?;
let empty = self.add_empty();
self.patch(union, compiled.start);
self.patch(union, empty);
self.patch(compiled.end, empty);
Ok(ThompsonRef { start: union, end: empty })
}
fn c_exactly(&self, expr: &Hir, n: u32) -> Result<ThompsonRef> {
let it = (0..n).map(|_| self.c(expr));
self.c_concat(it)
}
fn c_byte_class(&self, cls: &hir::ClassBytes) -> Result<ThompsonRef> {
let end = self.add_empty();
let mut trans = Vec::with_capacity(cls.ranges().len());
for r in cls.iter() {
trans.push(Transition {
start: r.start(),
end: r.end(),
next: end,
});
}
Ok(ThompsonRef { start: self.add_sparse(trans), end })
}
fn c_unicode_class(&self, cls: &hir::ClassUnicode) -> Result<ThompsonRef> {
if cls.is_all_ascii() {
let end = self.add_empty();
let mut trans = Vec::with_capacity(cls.ranges().len());
for r in cls.iter() {
assert!(r.start() <= '\x7F');
assert!(r.end() <= '\x7F');
trans.push(Transition {
start: r.start() as u8,
end: r.end() as u8,
next: end,
});
}
Ok(ThompsonRef { start: self.add_sparse(trans), end })
} else if self.config.reverse {
if !self.config.shrink {
self.c_unicode_class_reverse_with_suffix(cls)
} else {
let mut trie = self.trie_state.borrow_mut();
trie.clear();
for rng in cls.iter() {
for mut seq in Utf8Sequences::new(rng.start(), rng.end()) {
seq.reverse();
trie.insert(seq.as_slice());
}
}
let mut utf8_state = self.utf8_state.borrow_mut();
let mut utf8c = Utf8Compiler::new(self, &mut *utf8_state);
trie.iter(|seq| {
utf8c.add(&seq);
});
Ok(utf8c.finish())
}
} else {
let mut utf8_state = self.utf8_state.borrow_mut();
let mut utf8c = Utf8Compiler::new(self, &mut *utf8_state);
for rng in cls.iter() {
for seq in Utf8Sequences::new(rng.start(), rng.end()) {
utf8c.add(seq.as_slice());
}
}
Ok(utf8c.finish())
}
}
fn c_unicode_class_reverse_with_suffix(
&self,
cls: &hir::ClassUnicode,
) -> Result<ThompsonRef> {
let mut cache = self.utf8_suffix.borrow_mut();
cache.clear();
let union = self.add_union();
let alt_end = self.add_empty();
for urng in cls.iter() {
for seq in Utf8Sequences::new(urng.start(), urng.end()) {
let mut end = alt_end;
for brng in seq.as_slice() {
let key = Utf8SuffixKey {
from: end,
start: brng.start,
end: brng.end,
};
let hash = cache.hash(&key);
if let Some(id) = cache.get(&key, hash) {
end = id;
continue;
}
let compiled = self.c_range(brng.start, brng.end);
self.patch(compiled.end, end);
end = compiled.start;
cache.set(key, hash, end);
}
self.patch(union, end);
}
}
Ok(ThompsonRef { start: union, end: alt_end })
}
fn c_range(&self, start: u8, end: u8) -> ThompsonRef {
let id = self.add_range(start, end);
ThompsonRef { start: id, end: id }
}
fn c_empty(&self) -> ThompsonRef {
let id = self.add_empty();
ThompsonRef { start: id, end: id }
}
fn c_unanchored_prefix_valid_utf8(&self) -> Result<ThompsonRef> {
self.c(&Hir::repetition(hir::Repetition {
kind: hir::RepetitionKind::ZeroOrMore,
greedy: false,
hir: Box::new(Hir::any(false)),
}))
}
fn c_unanchored_prefix_invalid_utf8(&self) -> Result<ThompsonRef> {
self.c(&Hir::repetition(hir::Repetition {
kind: hir::RepetitionKind::ZeroOrMore,
greedy: false,
hir: Box::new(Hir::any(true)),
}))
}
fn patch(&self, from: StateID, to: StateID) {
match self.states.borrow_mut()[from] {
CState::Empty { ref mut next } => {
*next = to;
}
CState::Range { ref mut range } => {
range.next = to;
}
CState::Sparse { .. } => {
panic!("cannot patch from a sparse NFA state")
}
CState::Union { ref mut alternates } => {
alternates.push(to);
}
CState::UnionReverse { ref mut alternates } => {
alternates.push(to);
}
CState::Match => {}
}
}
fn add_empty(&self) -> StateID {
let id = self.states.borrow().len();
self.states.borrow_mut().push(CState::Empty { next: 0 });
id
}
fn add_range(&self, start: u8, end: u8) -> StateID {
let id = self.states.borrow().len();
let trans = Transition { start, end, next: 0 };
let state = CState::Range { range: trans };
self.states.borrow_mut().push(state);
id
}
fn add_sparse(&self, ranges: Vec<Transition>) -> StateID {
if ranges.len() == 1 {
let id = self.states.borrow().len();
let state = CState::Range { range: ranges[0] };
self.states.borrow_mut().push(state);
return id;
}
let id = self.states.borrow().len();
let state = CState::Sparse { ranges };
self.states.borrow_mut().push(state);
id
}
fn add_union(&self) -> StateID {
let id = self.states.borrow().len();
let state = CState::Union { alternates: vec![] };
self.states.borrow_mut().push(state);
id
}
fn add_reverse_union(&self) -> StateID {
let id = self.states.borrow().len();
let state = CState::UnionReverse { alternates: vec![] };
self.states.borrow_mut().push(state);
id
}
fn add_match(&self) -> StateID {
let id = self.states.borrow().len();
self.states.borrow_mut().push(CState::Match);
id
}
}
#[derive(Debug)]
struct Utf8Compiler<'a> {
nfac: &'a Compiler,
state: &'a mut Utf8State,
target: StateID,
}
#[derive(Clone, Debug)]
struct Utf8State {
compiled: Utf8BoundedMap,
uncompiled: Vec<Utf8Node>,
}
#[derive(Clone, Debug)]
struct Utf8Node {
trans: Vec<Transition>,
last: Option<Utf8LastTransition>,
}
#[derive(Clone, Debug)]
struct Utf8LastTransition {
start: u8,
end: u8,
}
impl Utf8State {
fn new() -> Utf8State {
Utf8State { compiled: Utf8BoundedMap::new(5000), uncompiled: vec![] }
}
fn clear(&mut self) {
self.compiled.clear();
self.uncompiled.clear();
}
}
impl<'a> Utf8Compiler<'a> {
fn new(nfac: &'a Compiler, state: &'a mut Utf8State) -> Utf8Compiler<'a> {
let target = nfac.add_empty();
state.clear();
let mut utf8c = Utf8Compiler { nfac, state, target };
utf8c.add_empty();
utf8c
}
fn finish(&mut self) -> ThompsonRef {
self.compile_from(0);
let node = self.pop_root();
let start = self.compile(node);
ThompsonRef { start, end: self.target }
}
fn add(&mut self, ranges: &[Utf8Range]) {
let prefix_len = ranges
.iter()
.zip(&self.state.uncompiled)
.take_while(|&(range, node)| {
node.last.as_ref().map_or(false, |t| {
(t.start, t.end) == (range.start, range.end)
})
})
.count();
assert!(prefix_len < ranges.len());
self.compile_from(prefix_len);
self.add_suffix(&ranges[prefix_len..]);
}
fn compile_from(&mut self, from: usize) {
let mut next = self.target;
while from + 1 < self.state.uncompiled.len() {
let node = self.pop_freeze(next);
next = self.compile(node);
}
self.top_last_freeze(next);
}
fn compile(&mut self, node: Vec<Transition>) -> StateID {
let hash = self.state.compiled.hash(&node);
if let Some(id) = self.state.compiled.get(&node, hash) {
return id;
}
let id = self.nfac.add_sparse(node.clone());
self.state.compiled.set(node, hash, id);
id
}
fn add_suffix(&mut self, ranges: &[Utf8Range]) {
assert!(!ranges.is_empty());
let last = self
.state
.uncompiled
.len()
.checked_sub(1)
.expect("non-empty nodes");
assert!(self.state.uncompiled[last].last.is_none());
self.state.uncompiled[last].last = Some(Utf8LastTransition {
start: ranges[0].start,
end: ranges[0].end,
});
for r in &ranges[1..] {
self.state.uncompiled.push(Utf8Node {
trans: vec![],
last: Some(Utf8LastTransition { start: r.start, end: r.end }),
});
}
}
fn add_empty(&mut self) {
self.state.uncompiled.push(Utf8Node { trans: vec![], last: None });
}
fn pop_freeze(&mut self, next: StateID) -> Vec<Transition> {
let mut uncompiled = self.state.uncompiled.pop().unwrap();
uncompiled.set_last_transition(next);
uncompiled.trans
}
fn pop_root(&mut self) -> Vec<Transition> {
assert_eq!(self.state.uncompiled.len(), 1);
assert!(self.state.uncompiled[0].last.is_none());
self.state.uncompiled.pop().expect("non-empty nodes").trans
}
fn top_last_freeze(&mut self, next: StateID) {
let last = self
.state
.uncompiled
.len()
.checked_sub(1)
.expect("non-empty nodes");
self.state.uncompiled[last].set_last_transition(next);
}
}
impl Utf8Node {
fn set_last_transition(&mut self, next: StateID) {
if let Some(last) = self.last.take() {
self.trans.push(Transition {
start: last.start,
end: last.end,
next,
});
}
}
}
#[cfg(test)]
mod tests {
use regex_syntax::hir::Hir;
use regex_syntax::ParserBuilder;
use super::{Builder, State, StateID, Transition, NFA};
fn parse(pattern: &str) -> Hir {
ParserBuilder::new().build().parse(pattern).unwrap()
}
fn build(pattern: &str) -> NFA {
Builder::new().anchored(true).build(&parse(pattern)).unwrap()
}
fn s_byte(byte: u8, next: StateID) -> State {
let trans = Transition { start: byte, end: byte, next };
State::Range { range: trans }
}
fn s_range(start: u8, end: u8, next: StateID) -> State {
let trans = Transition { start, end, next };
State::Range { range: trans }
}
fn s_sparse(ranges: &[(u8, u8, StateID)]) -> State {
let ranges = ranges
.iter()
.map(|&(start, end, next)| Transition { start, end, next })
.collect();
State::Sparse { ranges }
}
fn s_union(alts: &[StateID]) -> State {
State::Union { alternates: alts.to_vec().into_boxed_slice() }
}
fn s_match() -> State {
State::Match
}
#[test]
fn errors() {
assert!(Builder::new().build(&parse(r"^")).is_err());
assert!(Builder::new().build(&parse(r"$")).is_err());
assert!(Builder::new().build(&parse(r"\A")).is_err());
assert!(Builder::new().build(&parse(r"\z")).is_err());
assert!(Builder::new().build(&parse(r"\b")).is_err());
assert!(Builder::new().build(&parse(r"\B")).is_err());
assert!(Builder::new().build(&parse(r"(?-u)\b")).is_err());
}
#[test]
fn compile_unanchored_prefix() {
let nfa = Builder::new().anchored(false).build(&parse(r"a")).unwrap();
assert_eq!(11, nfa.len());
assert_eq!(nfa.states[10], s_match());
assert_eq!(nfa.states[9], s_byte(b'a', 10));
let nfa = Builder::new()
.anchored(false)
.allow_invalid_utf8(true)
.build(&parse(r"a"))
.unwrap();
assert_eq!(
nfa.states,
&[
s_union(&[2, 1]),
s_range(0, 255, 0),
s_byte(b'a', 3),
s_match(),
]
);
}
#[test]
fn compile_empty() {
assert_eq!(build("").states, &[s_match(),]);
}
#[test]
fn compile_literal() {
assert_eq!(build("a").states, &[s_byte(b'a', 1), s_match(),]);
assert_eq!(
build("ab").states,
&[s_byte(b'a', 1), s_byte(b'b', 2), s_match(),]
);
assert_eq!(
build("☃").states,
&[s_byte(0xE2, 1), s_byte(0x98, 2), s_byte(0x83, 3), s_match(),]
);
let hir = ParserBuilder::new()
.allow_invalid_utf8(true)
.build()
.parse(r"(?-u)\xFF")
.unwrap();
let nfa = Builder::new()
.anchored(true)
.allow_invalid_utf8(true)
.build(&hir)
.unwrap();
assert_eq!(nfa.states, &[s_byte(b'\xFF', 1), s_match(),]);
}
#[test]
fn compile_class() {
assert_eq!(
build(r"[a-z]").states,
&[s_range(b'a', b'z', 1), s_match(),]
);
assert_eq!(
build(r"[x-za-c]").states,
&[s_sparse(&[(b'a', b'c', 1), (b'x', b'z', 1)]), s_match()]
);
assert_eq!(
build(r"[\u03B1-\u03B4]").states,
&[s_range(0xB1, 0xB4, 2), s_byte(0xCE, 0), s_match()]
);
assert_eq!(
build(r"[\u03B1-\u03B4\u{1F919}-\u{1F91E}]").states,
&[
s_range(0xB1, 0xB4, 5),
s_range(0x99, 0x9E, 5),
s_byte(0xA4, 1),
s_byte(0x9F, 2),
s_sparse(&[(0xCE, 0xCE, 0), (0xF0, 0xF0, 3)]),
s_match(),
]
);
assert_eq!(
build(r"[a-z☃]").states,
&[
s_byte(0x83, 3),
s_byte(0x98, 0),
s_sparse(&[(b'a', b'z', 3), (0xE2, 0xE2, 1)]),
s_match(),
]
);
}
#[test]
fn compile_repetition() {
assert_eq!(
build(r"a?").states,
&[s_union(&[1, 2]), s_byte(b'a', 2), s_match(),]
);
assert_eq!(
build(r"a??").states,
&[s_union(&[2, 1]), s_byte(b'a', 2), s_match(),]
);
}
#[test]
fn compile_group() {
assert_eq!(
build(r"ab+").states,
&[s_byte(b'a', 1), s_byte(b'b', 2), s_union(&[1, 3]), s_match(),]
);
assert_eq!(
build(r"(ab)").states,
&[s_byte(b'a', 1), s_byte(b'b', 2), s_match(),]
);
assert_eq!(
build(r"(ab)+").states,
&[s_byte(b'a', 1), s_byte(b'b', 2), s_union(&[0, 3]), s_match(),]
);
}
#[test]
fn compile_alternation() {
assert_eq!(
build(r"a|b").states,
&[s_byte(b'a', 3), s_byte(b'b', 3), s_union(&[0, 1]), s_match(),]
);
assert_eq!(
build(r"|b").states,
&[s_byte(b'b', 2), s_union(&[2, 0]), s_match(),]
);
assert_eq!(
build(r"a|").states,
&[s_byte(b'a', 2), s_union(&[0, 2]), s_match(),]
);
}
}