summaryrefslogtreecommitdiffstats
path: root/regex.c
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2017-09-12 20:15:42 -0700
committerKaz Kylheku <kaz@kylheku.com>2017-09-12 20:15:42 -0700
commita05a064e31cc0da13d8eee2e7705f3ac7d08c46a (patch)
tree75748fe0248a0bbbdd8b712e93bd3571c1fc999d /regex.c
parent1d1a1c61329de3fc0400306d13de0377ff5e316c (diff)
downloadtxr-a05a064e31cc0da13d8eee2e7705f3ac7d08c46a.tar.gz
txr-a05a064e31cc0da13d8eee2e7705f3ac7d08c46a.tar.bz2
txr-a05a064e31cc0da13d8eee2e7705f3ac7d08c46a.zip
regex: accept-folding optimization.
In this optimization, we identify places in the NFA graph where empty states transition to accept states. We eliminate these transitions and turn the empty states into accept states. Accept states which thereby become unreachable from the start state are pruned away. * regex.c (nfa_fold_accept): New static function. (nfa_optimize): Add a pass which applies nfa_fold_accept to all states.
Diffstat (limited to 'regex.c')
-rw-r--r--regex.c23
1 files changed, 23 insertions, 0 deletions
diff --git a/regex.c b/regex.c
index 009bf7e0..aa57a326 100644
--- a/regex.c
+++ b/regex.c
@@ -1176,6 +1176,26 @@ static void nfa_thread_epsilons(nfa_state_t **ps, unsigned visited)
}
}
+static void nfa_fold_accept(nfa_state_t *s, mem_t *ctx)
+{
+ (void) ctx;
+
+ if (s->a.kind == nfa_empty) {
+ nfa_state_t *e0 = s->e.trans0;
+ nfa_state_t *e1 = s->e.trans1;
+
+ if (e0 && nfa_accept_state_p(e0)) {
+ s->e.accept = 1;
+ s->e.trans0 = 0;
+ }
+
+ if (e1 && nfa_accept_state_p(e1)) {
+ s->e.accept = 1;
+ s->e.trans1 = 0;
+ }
+ }
+}
+
static void nfa_noop(nfa_state_t *s, mem_t *ctx)
{
(void) s;
@@ -1196,6 +1216,9 @@ static nfa_t nfa_optimize(nfa_t nfa)
/* Eliminate useless epsilons from graph. */
nfa_thread_epsilons(&nfa.start, nfa.start->a.visited + 1);
+ /* Fold accept states into empty transitions which reference them. */
+ nfa_map_states(nfa.start, 0, nfa_fold_accept, nfa.start->a.visited + 1);
+
/* Visit all reachable states. */
nfa_map_states(nfa.start, 0, nfa_noop, nfa.start->a.visited + 1);