summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog17
-rw-r--r--regex.c32
-rw-r--r--txr.c4
3 files changed, 51 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index 8f34644a..1a134673 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,22 @@
2010-01-13 Kaz Kylheku <kkylheku@gmail.com>
+ Dynamically determine which regex implementation to use:
+ NFA or derivatives. The default behavior is NFA, with
+ derivatives used if the regular expression contains
+ uses of complement or intersection. The --dv-regex
+ option forces derivatives always.
+
+ * regex.c (opt_derivative_regex): Default value is 0 now.
+ (regex_requires_dv): New function.
+ (regex_compile): If regex_requires_dv function reports
+ true, or if the opt_derivative_regex flag is true,
+ treat the regex with the derivative-based implementation.
+
+ * txr.c (txr_main): Implemented --dv-regex option
+ to set the opt_derivative_regex flag.
+
+2010-01-13 Kaz Kylheku <kkylheku@gmail.com>
+
* lib.h (c_num): Remove redundant declaration.
2010-01-13 Kaz Kylheku <kkylheku@gmail.com>
diff --git a/regex.c b/regex.c
index 147f03cb..a45339d5 100644
--- a/regex.c
+++ b/regex.c
@@ -183,7 +183,7 @@ union regex_machine {
struct dv_machine d;
};
-int opt_derivative_regex = 1;
+int opt_derivative_regex = 0;
static int L0_full(cset_L0_t *L0)
{
@@ -1336,9 +1336,37 @@ static cnum dv_run(val regex, const wchar_t *str)
return last_accept_pos ? last_accept_pos - str : -1;
}
+static val regex_requires_dv(val exp)
+{
+ if (atom(exp)) {
+ return nil;
+ } else {
+ val sym = first(exp);
+ val args = rest(exp);
+
+ if (sym == set_s || sym == cset_s) {
+ return nil;
+ } else if (sym == compound_s) {
+ return some_satisfy(args, func_n1(regex_requires_dv), nil);
+ } else if (sym == zeroplus_s || sym == oneplus_s ||
+ sym == optional_s) {
+ return regex_requires_dv(first(args));
+ } else if (sym == compl_s) {
+ return t;
+ } else if (sym == or_s) {
+ return if2(regex_requires_dv(first(args)) ||
+ regex_requires_dv(second(args)), t);
+ } else if (sym == and_s) {
+ return t;
+ } else {
+ internal_error("bad operator in regex");
+ }
+ }
+}
+
val regex_compile(val regex_sexp)
{
- if (opt_derivative_regex) {
+ if (opt_derivative_regex || regex_requires_dv(regex_sexp)) {
return cons(compiled_regex_s, cons(dv_compile_regex(regex_sexp), nil));
} else {
nfa_t *pnfa = (nfa_t *) chk_malloc(sizeof *pnfa);
diff --git a/txr.c b/txr.c
index 16faa19a..ff1397f7 100644
--- a/txr.c
+++ b/txr.c
@@ -281,6 +281,10 @@ int txr_main(int argc, char **argv)
prog_string, string_utf8(*argv), nao);
return EXIT_FAILURE;
#endif
+ } else if (!strcmp(*argv, "--dv-regex")) {
+ opt_derivative_regex = 1;
+ argv++, argc--;
+ continue;
}
{