diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2017-09-11 19:38:29 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2017-09-11 19:38:29 -0700 |
commit | 6417918274d7d7d8d5b7bb675906f8e38d25c073 (patch) | |
tree | b0cae1edde45ba238214b8f64fcb335f5883feca /regex.c | |
parent | fd47bc4ae6bb9f6a4dab2cf12e8c5cb7459675e2 (diff) | |
download | txr-6417918274d7d7d8d5b7bb675906f8e38d25c073.tar.gz txr-6417918274d7d7d8d5b7bb675906f8e38d25c073.tar.bz2 txr-6417918274d7d7d8d5b7bb675906f8e38d25c073.zip |
regex: remove nfa_reject representation.
Reject states are no longer explicitly represented in the NFA
graph. Any transition that previously would have led to a
reject state is just a null pointer now: no transition.
This just means we have to check one place for a null pointer.
This change fixes a flaw in nfa_closure: the function was
propagating nfa_reject states to the closure set. When
the input set consisted only of reject states (or rather
exactly one reject state in that special case), it would
add it to the output set and return 1, making it seem as
if the regex is one that can potentially match something.
* regex.c (nfa_kind_t): Enum member nfa_reject removed.
(nfa_state_reject): Static function removed.
(nfa_compile_regex): Compile a t regex (match nothing)
to a NFA graph with no transition at all into any start
state; effectively an empty graph with no state nodes.
(nfa_map_states): Remove nfa_reject case from switch. Also,
stylistic change; state pointers are tested as Booleans
elsewhere rather than by comparison to null pointer.
(nfa_count_states, nfa_handle_wraparound): Handle null state
pointer.
(nfa_move_closure): Test the mov transition for null;
anything can potentially now have a null transition,
not only epsilon nodes.
(nfa_run, regex_machine_reset): Handle nfa.start being null
indicating an empty NFA graph. In that case we don't have to
calculate the closure; we just have an empty set of states and
set nfa.nclos to zero. The visited disambiguation counter is
irrelevant since there are no states to visit.
(regex_machine_feed): Don't try to propagate visited counter
stored in regex machine to empty NFA graph which has no states.
Diffstat (limited to 'regex.c')
-rw-r--r-- | regex.c | 76 |
1 files changed, 36 insertions, 40 deletions
@@ -190,7 +190,7 @@ typedef union char_set { } char_set_t; typedef enum { - nfa_accept, nfa_reject, nfa_empty, nfa_wild, nfa_single, nfa_set + nfa_accept, nfa_empty, nfa_wild, nfa_single, nfa_set } nfa_kind_t; struct nfa_state_accept { @@ -814,14 +814,6 @@ static nfa_state_t *nfa_state_accept(void) return st; } -static nfa_state_t *nfa_state_reject(void) -{ - nfa_state_t *st = coerce(nfa_state_t *, chk_malloc(sizeof *st)); - st->a.kind = nfa_reject; - st->a.visited = 0; - return st; -} - static nfa_state_t *nfa_state_empty(nfa_state_t *t0, nfa_state_t *t1) { nfa_state_t *st = coerce(nfa_state_t *, chk_malloc(sizeof *st)); @@ -1055,9 +1047,7 @@ static nfa_t nfa_compile_regex(val exp) uw_throwf(error_s, lit("bad operator in regex syntax: ~s"), sym, nao); } } else if (exp == t) { - nfa_state_t *acc = nfa_state_accept(); - nfa_state_t *s = nfa_state_reject(); - return nfa_make(s, acc); + return nfa_make(0, 0); } else { uw_throwf(error_s, lit("bad object in regex syntax: ~s"), exp, nao); } @@ -1068,13 +1058,12 @@ static void nfa_map_states(nfa_state_t *s, mem_t *ctx), unsigned visited) { - if (s != 0 && s->a.visited != visited) { + if (s && s->a.visited != visited) { s->a.visited = visited; fun(s, ctx); switch (s->a.kind) { case nfa_accept: - case nfa_reject: break; case nfa_empty: nfa_map_states(s->e.trans0, ctx, fun, visited); @@ -1099,14 +1088,16 @@ static void nfa_count_one(nfa_state_t *s, mem_t *ctx) static int nfa_count_states(nfa_state_t *s) { int count = 0; - unsigned visited = s->a.visited + 1; - nfa_map_states(s, coerce(mem_t *, &count), nfa_count_one, visited); + if (s) { + unsigned visited = s->a.visited + 1; + nfa_map_states(s, coerce(mem_t *, &count), nfa_count_one, visited); + } return count; } static void nfa_handle_wraparound(nfa_state_t *s, unsigned *pvisited) { - if (*pvisited == UINT_MAX) { + if (s && *pvisited == UINT_MAX) { s->a.visited = UINT_MAX - 1; (void) nfa_count_states(s); s->a.visited = UINT_MAX; @@ -1249,11 +1240,13 @@ static int nfa_move_closure(nfa_state_t **stack, nfa_state_t **set, int nin, bug_unless (stackp < nstates); - mov->a.visited = visited; - stack[stackp++] = mov; - set[nout++] = mov; - if (mov->a.kind == nfa_accept) - *accept = 1; + if (mov != 0) { + mov->a.visited = visited; + stack[stackp++] = mov; + set[nout++] = mov; + if (mov->a.kind == nfa_accept) + *accept = 1; + } } } @@ -1314,18 +1307,18 @@ static int nfa_move_closure(nfa_state_t **stack, nfa_state_t **set, int nin, static cnum nfa_run(nfa_t nfa, int nstates, const wchar_t *str) { const wchar_t *last_accept_pos = 0, *ptr = str; - unsigned visited = nfa.start->a.visited + 1; + unsigned visited = nfa.start ? (nfa.start->a.visited + 1) : 0; nfa_state_t **set = coerce(nfa_state_t **, alloca(nstates * sizeof *set)); nfa_state_t **stack = coerce(nfa_state_t **, alloca(nstates * sizeof *stack)); - int nclos; + int nclos = 0; int accept = 0; nfa_handle_wraparound(nfa.start, &visited); - set[0] = nfa.start; - - nclos = nfa_closure(stack, set, 1, - nstates, visited++, &accept); + if (nfa.start) { + set[0] = nfa.start; + nclos = nfa_closure(stack, set, 1, nstates, visited++, &accept); + } if (accept) last_accept_pos = ptr; @@ -1347,7 +1340,8 @@ static cnum nfa_run(nfa_t nfa, int nstates, const wchar_t *str) break; } - nfa.start->a.visited = visited; + if (nfa.start) + nfa.start->a.visited = visited; return last_accept_pos ? last_accept_pos - str : -1; } @@ -2351,16 +2345,17 @@ static void regex_machine_reset(regex_machine_t *regm) if (regm->n.is_nfa) { nfa_state_t *s = regm->n.nfa.start; - regm->n.visited = regm->n.nfa.start->a.visited + 1; - nfa_handle_wraparound(s, ®m->n.visited); - - regm->n.set[0] = regm->n.nfa.start; - - regm->n.nclos = nfa_closure(regm->n.stack, regm->n.set, 1, - regm->n.nstates, - regm->n.visited++, &accept); - - regm->n.nfa.start->a.visited = regm->n.visited; + if (s) { + regm->n.visited = s->a.visited + 1; + nfa_handle_wraparound(s, ®m->n.visited); + regm->n.set[0] = s; + regm->n.nclos = nfa_closure(regm->n.stack, regm->n.set, 1, + regm->n.nstates, + regm->n.visited++, &accept); + s->a.visited = regm->n.visited; + } else { + regm->n.nclos = 0; + } } else { regm->d.deriv = regm->d.regex; accept = (reg_nullable(regm->d.regex) != nil); @@ -2418,7 +2413,8 @@ static regm_result_t regex_machine_feed(regex_machine_t *regm, wchar_t ch) regm->n.nstates, ch, regm->n.visited++, &accept); - regm->n.nfa.start->a.visited = regm->n.visited; + if (regm->n.nfa.start) + regm->n.nfa.start->a.visited = regm->n.visited; if (accept) { regm->n.last_accept_pos = regm->n.count; |