summaryrefslogtreecommitdiffstats
path: root/regex.c
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2015-09-15 06:58:43 -0700
committerKaz Kylheku <kaz@kylheku.com>2015-09-15 06:58:43 -0700
commit221c736a969f7305ffabc9a37b968879aed0affc (patch)
tree0a42329f3b82fb8879bfdd5a946ab52b09fba34a /regex.c
parent815f1dcda70f161b09813fc4fddc24c522ea71bf (diff)
downloadtxr-221c736a969f7305ffabc9a37b968879aed0affc.tar.gz
txr-221c736a969f7305ffabc9a37b968879aed0affc.tar.bz2
txr-221c736a969f7305ffabc9a37b968879aed0affc.zip
Regex state-marking counter wraparound bug.
When a NFA regex goes through more than 4.29 billion state transitions, the state coloring "visited" marker wraps around. There could still exist states with old values at or near zero, which destroys the correctness of the closure calculations. * regex.c (nfa_handle_wraparound): New static function. The wraparound situation is handled by detecting when the next marker value is UINT_MAX. When this happens, we visit all states, marking them to UINT_MAX. Then we visit them again, marking them to zero, and set the next marker value to 1. (nfa_free): Added comment about why we don't have a wraparound check, in case it isn't obvious. (nfa_run): Check for wraparound before eveyr nfa_closure call. (regex_machine_reset): Check for wraparound before nfa_closure call. Fix: store the counter back in the start state's visited field. (regex_machine_init): Initialize the n.visited field of the regex machine structure to zero. Not strictly necessary, since it's initialized moments later in regex_machine_reset, but good form. (regex_machine_feed): Check for wraparound before nfa_closure call.
Diffstat (limited to 'regex.c')
-rw-r--r--regex.c29
1 files changed, 28 insertions, 1 deletions
diff --git a/regex.c b/regex.c
index 65f14843..9cfd60d8 100644
--- a/regex.c
+++ b/regex.c
@@ -1089,6 +1089,17 @@ static int nfa_count_states(nfa_state_t *s)
return count;
}
+static void nfa_handle_wraparound(nfa_state_t *s, unsigned *pvisited)
+{
+ if (*pvisited == UINT_MAX) {
+ s->a.visited = UINT_MAX - 1;
+ (void) nfa_count_states(s);
+ s->a.visited = UINT_MAX;
+ (void) nfa_count_states(s);
+ *pvisited = 1;
+ }
+}
+
static void nfa_collect_one(nfa_state_t *s, mem_t *ctx)
{
nfa_state_t ***ppel = coerce(nfa_state_t ***, ctx);
@@ -1102,6 +1113,9 @@ static void nfa_free(nfa_t nfa, int nstates)
unsigned visited = s->a.visited + 1;
int i;
+ /* We don't care if visited has reached UINT_MAX here, because the regex is
+ * going away, so we don't bother with nfa_handle_wraparound.
+ */
nfa_map_states(s, coerce(mem_t *, &pelem), nfa_collect_one, visited);
assert (pelem - all == nstates);
@@ -1247,6 +1261,8 @@ static cnum nfa_run(nfa_t nfa, int nstates, const wchar_t *str)
int nmove = 1, nclos;
int accept = 0;
+ nfa_handle_wraparound(nfa.start, &visited);
+
move[0] = nfa.start;
nclos = nfa_closure(stack, move, nmove, clos,
@@ -1260,6 +1276,8 @@ static cnum nfa_run(nfa_t nfa, int nstates, const wchar_t *str)
accept = 0;
+ nfa_handle_wraparound(nfa.start, &visited);
+
nmove = nfa_move(clos, nclos, move, ch);
nclos = nfa_closure(stack, move, nmove, clos,
nstates, visited++, &accept);
@@ -1860,14 +1878,20 @@ static void regex_machine_reset(regex_machine_t *regm)
regm->n.count = 0;
if (regm->n.is_nfa) {
- regm->n.visited = regm->n.nfa.start->a.visited + 1;
+ nfa_state_t *s = regm->n.nfa.start;
+
regm->n.nmove = 1;
+ regm->n.visited = regm->n.nfa.start->a.visited + 1;
+ nfa_handle_wraparound(s, &regm->n.visited);
+
regm->n.move[0] = regm->n.nfa.start;
regm->n.nclos = nfa_closure(regm->n.stack, regm->n.move, regm->n.nmove,
regm->n.clos, regm->n.nstates,
regm->n.visited++, &accept);
+
+ regm->n.nfa.start->a.visited = regm->n.visited;
} else {
regm->d.deriv = regm->d.regex;
accept = (reg_nullable(regm->d.regex) != nil);
@@ -1888,6 +1912,7 @@ static void regex_machine_init(regex_machine_t *regm, val reg)
regm->n.is_nfa = 1;
regm->n.nfa = regex->r.nfa;
regm->n.nstates = regex->nstates;
+ regm->n.visited = 0;
regm->n.move = coerce(nfa_state_t **,
chk_malloc(regex->nstates * sizeof *regm->n.move));
regm->n.clos = coerce(nfa_state_t **,
@@ -1918,6 +1943,8 @@ static regm_result_t regex_machine_feed(regex_machine_t *regm, wchar_t ch)
int accept = 0;
if (regm->n.is_nfa) {
+ nfa_handle_wraparound(regm->n.nfa.start, &regm->n.visited);
+
if (ch != 0) {
regm->n.count++;