summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2017-09-12 20:15:42 -0700
committerKaz Kylheku <kaz@kylheku.com>2017-09-12 20:15:42 -0700
commita05a064e31cc0da13d8eee2e7705f3ac7d08c46a (patch)
tree75748fe0248a0bbbdd8b712e93bd3571c1fc999d
parent1d1a1c61329de3fc0400306d13de0377ff5e316c (diff)
downloadtxr-a05a064e31cc0da13d8eee2e7705f3ac7d08c46a.tar.gz
txr-a05a064e31cc0da13d8eee2e7705f3ac7d08c46a.tar.bz2
txr-a05a064e31cc0da13d8eee2e7705f3ac7d08c46a.zip
regex: accept-folding optimization.
In this optimization, we identify places in the NFA graph where empty states transition to accept states. We eliminate these transitions and turn the empty states into accept states. Accept states which thereby become unreachable from the start state are pruned away. * regex.c (nfa_fold_accept): New static function. (nfa_optimize): Add a pass which applies nfa_fold_accept to all states.
-rw-r--r--regex.c23
1 files changed, 23 insertions, 0 deletions
diff --git a/regex.c b/regex.c
index 009bf7e0..aa57a326 100644
--- a/regex.c
+++ b/regex.c
@@ -1176,6 +1176,26 @@ static void nfa_thread_epsilons(nfa_state_t **ps, unsigned visited)
}
}
+static void nfa_fold_accept(nfa_state_t *s, mem_t *ctx)
+{
+ (void) ctx;
+
+ if (s->a.kind == nfa_empty) {
+ nfa_state_t *e0 = s->e.trans0;
+ nfa_state_t *e1 = s->e.trans1;
+
+ if (e0 && nfa_accept_state_p(e0)) {
+ s->e.accept = 1;
+ s->e.trans0 = 0;
+ }
+
+ if (e1 && nfa_accept_state_p(e1)) {
+ s->e.accept = 1;
+ s->e.trans1 = 0;
+ }
+ }
+}
+
static void nfa_noop(nfa_state_t *s, mem_t *ctx)
{
(void) s;
@@ -1196,6 +1216,9 @@ static nfa_t nfa_optimize(nfa_t nfa)
/* Eliminate useless epsilons from graph. */
nfa_thread_epsilons(&nfa.start, nfa.start->a.visited + 1);
+ /* Fold accept states into empty transitions which reference them. */
+ nfa_map_states(nfa.start, 0, nfa_fold_accept, nfa.start->a.visited + 1);
+
/* Visit all reachable states. */
nfa_map_states(nfa.start, 0, nfa_noop, nfa.start->a.visited + 1);