Fix #186.

BurntSushi · BurntSushi · commit 3fb34e24d21a · 2016-03-14T17:04:48.000-04:00
The problem here was that match instructions from each regex in a set
were being carried through all of the DFA states. This was intentional
so that when the DFA was done executing, we could look at the last state
the machine was in and figure which match instructions were reached, and
therefore determine which expressions in the set matched.

This doesn't work when every regex in the set is anchored, because it
causes the DFA to continue creating non-dead states, even when no other
regex could possibly match.

To fix this, we are more selective about adding match instructions to
states. This will cause the match instructions to eventually disappear,
so we build up the matches in the set as they occur when producing cached
states. This is a little hokey, but since we're only interested in the
existence of a match, it works.
diff --git a/src/compile.rs b/src/compile.rs
@@ -998,7 +998,7 @@ impl ByteClassSet {
         // N.B. If you're debugging the DFA, it's useful to simply return
         // `(0..256).collect()`, which effectively removes the byte classes
         // and makes the transitions easier to read.
-        // return (0..256).collect();
+        // return (0..256).map(|b| b as u8).collect();
         let mut byte_classes = vec![0; 256];
         let mut class = 0u8;
         for i in 0..256 {
diff --git a/src/dfa.rs b/src/dfa.rs
@@ -46,7 +46,7 @@ use std::fmt;
 use std::mem;
 
 use exec::Search;
-use prog::{Inst, Program};
+use prog::Program;
 use sparse::SparseSet;
 
 /// The cache limit specifies approximately how much space we're willing to
@@ -465,6 +465,7 @@ impl<'a, 'b, 'c, 'm> Dfa<'a, 'b, 'c, 'm> {
             }
             self.at += 1;
         }
+        // println!("exiting loop at input {:?}", self.at);
         // Run the DFA once more on the special EOF senitnel value.
         si = match self.next_state(qcur, qnext, si, Byte::eof()) {
             None => return DfaResult::Quit,
@@ -481,13 +482,6 @@ impl<'a, 'b, 'c, 'm> Dfa<'a, 'b, 'c, 'm> {
             result = DfaResult::Match;
             self.search.set_end(Some(text.len()));
         }
-        if result.is_match() && !self.search.find_one_match() {
-            for &ip in &self.states[si as usize].insts {
-                if let Inst::Match(slot) = self.prog[ip as usize] {
-                    self.search.set_match(slot);
-                }
-            }
-        }
         result
     }
 
@@ -628,16 +622,16 @@ impl<'a, 'b, 'c, 'm> Dfa<'a, 'b, 'c, 'm> {
                 Char(_) | Ranges(_) => unreachable!(),
                 // These states are handled when following epsilon transitions.
                 Save(_) | Split(_) | EmptyLook(_) => {}
-                Match(_) => {
+                Match(slot) => {
                     is_match = true;
                     if !self.continue_past_first_match() {
                         break;
-                    } else if !self.search.find_one_match() {
+                    } else if !self.search.find_one_match()
+                            && !self.search.has_match(slot)
+                            && !qnext.contains_ip(ip as usize) {
                         // If we are continuing on to find other matches,
                         // then keep a record of the match states we've seen.
-                        if !qnext.contains_ip(ip as usize) {
-                            qnext.add(ip);
-                        }
+                        qnext.add(ip);
                     }
                 }
                 Bytes(ref inst) => {
@@ -876,8 +870,9 @@ impl<'a, 'b, 'c, 'm> Dfa<'a, 'b, 'c, 'm> {
                         | NotWordBoundaryAscii => unreachable!(),
                     }
                 }
-                Match(_) => {
+                Match(slot) => {
                     insts.push(ip);
+                    self.search.set_match(slot);
                     if !self.continue_past_first_match() {
                         break;
                     }
diff --git a/src/exec.rs b/src/exec.rs
@@ -76,6 +76,10 @@ impl<'caps, 'matches> Search<'caps, 'matches> {
         self.matches.iter().all(|m| *m)
     }
 
+    pub fn has_match(&mut self, match_slot: usize) -> bool {
+        self.matches.get(match_slot).map_or(false, |&b| b)
+    }
+
     pub fn set_match(&mut self, match_slot: usize) {
         self.matched_any = true;
         if let Some(old) = self.matches.get_mut(match_slot) {