diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2015-09-15 06:58:43 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2015-09-15 06:58:43 -0700 |
commit | 221c736a969f7305ffabc9a37b968879aed0affc (patch) | |
tree | 0a42329f3b82fb8879bfdd5a946ab52b09fba34a /regex.c | |
parent | 815f1dcda70f161b09813fc4fddc24c522ea71bf (diff) | |
download | txr-221c736a969f7305ffabc9a37b968879aed0affc.tar.gz txr-221c736a969f7305ffabc9a37b968879aed0affc.tar.bz2 txr-221c736a969f7305ffabc9a37b968879aed0affc.zip |
Regex state-marking counter wraparound bug.
When a NFA regex goes through more than 4.29 billion state
transitions, the state coloring "visited" marker wraps around.
There could still exist states with old values at or near
zero, which destroys the correctness of the closure
calculations.
* regex.c (nfa_handle_wraparound): New static function.
The wraparound situation is handled by detecting when
the next marker value is UINT_MAX. When this happens,
we visit all states, marking them to UINT_MAX.
Then we visit them again, marking them to zero, and
set the next marker value to 1.
(nfa_free): Added comment about why we don't have a
wraparound check, in case it isn't obvious.
(nfa_run): Check for wraparound before eveyr nfa_closure call.
(regex_machine_reset): Check for wraparound before nfa_closure
call. Fix: store the counter back in the start state's visited
field.
(regex_machine_init): Initialize the n.visited field of the
regex machine structure to zero. Not strictly necessary, since
it's initialized moments later in regex_machine_reset, but
good form.
(regex_machine_feed): Check for wraparound before nfa_closure
call.
Diffstat (limited to 'regex.c')
-rw-r--r-- | regex.c | 29 |
1 files changed, 28 insertions, 1 deletions
@@ -1089,6 +1089,17 @@ static int nfa_count_states(nfa_state_t *s) return count; } +static void nfa_handle_wraparound(nfa_state_t *s, unsigned *pvisited) +{ + if (*pvisited == UINT_MAX) { + s->a.visited = UINT_MAX - 1; + (void) nfa_count_states(s); + s->a.visited = UINT_MAX; + (void) nfa_count_states(s); + *pvisited = 1; + } +} + static void nfa_collect_one(nfa_state_t *s, mem_t *ctx) { nfa_state_t ***ppel = coerce(nfa_state_t ***, ctx); @@ -1102,6 +1113,9 @@ static void nfa_free(nfa_t nfa, int nstates) unsigned visited = s->a.visited + 1; int i; + /* We don't care if visited has reached UINT_MAX here, because the regex is + * going away, so we don't bother with nfa_handle_wraparound. + */ nfa_map_states(s, coerce(mem_t *, &pelem), nfa_collect_one, visited); assert (pelem - all == nstates); @@ -1247,6 +1261,8 @@ static cnum nfa_run(nfa_t nfa, int nstates, const wchar_t *str) int nmove = 1, nclos; int accept = 0; + nfa_handle_wraparound(nfa.start, &visited); + move[0] = nfa.start; nclos = nfa_closure(stack, move, nmove, clos, @@ -1260,6 +1276,8 @@ static cnum nfa_run(nfa_t nfa, int nstates, const wchar_t *str) accept = 0; + nfa_handle_wraparound(nfa.start, &visited); + nmove = nfa_move(clos, nclos, move, ch); nclos = nfa_closure(stack, move, nmove, clos, nstates, visited++, &accept); @@ -1860,14 +1878,20 @@ static void regex_machine_reset(regex_machine_t *regm) regm->n.count = 0; if (regm->n.is_nfa) { - regm->n.visited = regm->n.nfa.start->a.visited + 1; + nfa_state_t *s = regm->n.nfa.start; + regm->n.nmove = 1; + regm->n.visited = regm->n.nfa.start->a.visited + 1; + nfa_handle_wraparound(s, ®m->n.visited); + regm->n.move[0] = regm->n.nfa.start; regm->n.nclos = nfa_closure(regm->n.stack, regm->n.move, regm->n.nmove, regm->n.clos, regm->n.nstates, regm->n.visited++, &accept); + + regm->n.nfa.start->a.visited = regm->n.visited; } else { regm->d.deriv = regm->d.regex; accept = (reg_nullable(regm->d.regex) != nil); @@ -1888,6 +1912,7 @@ static void regex_machine_init(regex_machine_t *regm, val reg) regm->n.is_nfa = 1; regm->n.nfa = regex->r.nfa; regm->n.nstates = regex->nstates; + regm->n.visited = 0; regm->n.move = coerce(nfa_state_t **, chk_malloc(regex->nstates * sizeof *regm->n.move)); regm->n.clos = coerce(nfa_state_t **, @@ -1918,6 +1943,8 @@ static regm_result_t regex_machine_feed(regex_machine_t *regm, wchar_t ch) int accept = 0; if (regm->n.is_nfa) { + nfa_handle_wraparound(regm->n.nfa.start, ®m->n.visited); + if (ch != 0) { regm->n.count++; |