diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2010-01-13 15:25:11 -0800 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2010-01-13 15:25:11 -0800 |
commit | 56cfdc348955495dbd11fc70fb7ac04a8bba1f71 (patch) | |
tree | ee7f0d7c44782243800dafc7605d588216d46755 /regex.c | |
parent | 5c111d0e415db7bc0bfa236b08ae6d620e898e92 (diff) | |
download | txr-56cfdc348955495dbd11fc70fb7ac04a8bba1f71.tar.gz txr-56cfdc348955495dbd11fc70fb7ac04a8bba1f71.tar.bz2 txr-56cfdc348955495dbd11fc70fb7ac04a8bba1f71.zip |
Dynamically determine which regex implementation to use:
NFA or derivatives. The default behavior is NFA, with
derivatives used if the regular expression contains
uses of complement or intersection. The --dv-regex
option forces derivatives always.
Diffstat (limited to 'regex.c')
-rw-r--r-- | regex.c | 32 |
1 files changed, 30 insertions, 2 deletions
@@ -183,7 +183,7 @@ union regex_machine { struct dv_machine d; }; -int opt_derivative_regex = 1; +int opt_derivative_regex = 0; static int L0_full(cset_L0_t *L0) { @@ -1336,9 +1336,37 @@ static cnum dv_run(val regex, const wchar_t *str) return last_accept_pos ? last_accept_pos - str : -1; } +static val regex_requires_dv(val exp) +{ + if (atom(exp)) { + return nil; + } else { + val sym = first(exp); + val args = rest(exp); + + if (sym == set_s || sym == cset_s) { + return nil; + } else if (sym == compound_s) { + return some_satisfy(args, func_n1(regex_requires_dv), nil); + } else if (sym == zeroplus_s || sym == oneplus_s || + sym == optional_s) { + return regex_requires_dv(first(args)); + } else if (sym == compl_s) { + return t; + } else if (sym == or_s) { + return if2(regex_requires_dv(first(args)) || + regex_requires_dv(second(args)), t); + } else if (sym == and_s) { + return t; + } else { + internal_error("bad operator in regex"); + } + } +} + val regex_compile(val regex_sexp) { - if (opt_derivative_regex) { + if (opt_derivative_regex || regex_requires_dv(regex_sexp)) { return cons(compiled_regex_s, cons(dv_compile_regex(regex_sexp), nil)); } else { nfa_t *pnfa = (nfa_t *) chk_malloc(sizeof *pnfa); |