summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog18
-rw-r--r--lib.c17
-rw-r--r--lib.h2
-rw-r--r--regex.c55
4 files changed, 91 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index 7868167c..c1a37dc3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,21 @@
+2010-01-26 Kaz Kylheku <kkylheku@gmail.com>
+
+ Optimization in derivative-based regex engine.
+ Exponential memory consumption behavior was observed when
+ matching the input aaaaaa....
+ against the regex a?a?a?a?....aaaa....
+ The fix is to eliminate common subexpressions
+ from the derivative for the or operator.
+
+ * lib.c (memqual, mapcon): New functions
+
+ * lib.h (memqual, mapcon): Declared.
+
+ * regex.c (flatten_or, unflatten_or,
+ unique_first, reduce_or): New functions.
+ (reg_derivative): Apply reduce_or
+ to the constructed disjunction.
+
2010-01-25 Kaz Kylheku <kkylheku@gmail.com>
Version 032
diff --git a/lib.c b/lib.c
index 45adedbd..30d7b661 100644
--- a/lib.c
+++ b/lib.c
@@ -364,6 +364,13 @@ val memq(val obj, val list)
return list;
}
+val memqual(val obj, val list)
+{
+ while (list && !equal(car(list), obj))
+ list = cdr(list);
+ return list;
+}
+
val tree_find(val obj, val tree)
{
if (equal(obj, tree))
@@ -1749,6 +1756,16 @@ val mapcar(val fun, val list)
return out;
}
+val mapcon(val fun, val list)
+{
+ list_collect_decl (out, iter);
+
+ for (; list; list = cdr(list))
+ list_collect_nconc (iter, funcall1(fun, list));
+
+ return out;
+}
+
val mappend(val fun, val list)
{
list_collect_decl (out, iter);
diff --git a/lib.h b/lib.h
index 85e21374..55f0ba6d 100644
--- a/lib.h
+++ b/lib.h
@@ -261,6 +261,7 @@ val append2(val list1, val list2);
val nappend2(val list1, val list2);
val flatten(val list);
val memq(val obj, val list);
+val memqual(val obj, val list);
val tree_find(val obj, val tree);
val some_satisfy(val list, val pred, val key);
val all_satisfy(val list, val pred, val key);
@@ -365,6 +366,7 @@ val alist_remove1(val list, val key);
val copy_cons(val cons);
val copy_alist(val list);
val mapcar(val fun, val list);
+val mapcon(val fun, val list);
val mappend(val fun, val list);
val merge(val list1, val list2, val lessfun, val keyfun);
val sort(val list, val lessfun, val keyfun);
diff --git a/regex.c b/regex.c
index 42968af2..0c81c262 100644
--- a/regex.c
+++ b/regex.c
@@ -1237,6 +1237,59 @@ static val reg_nullable(val exp)
}
}
+static val flatten_or(val or_expr)
+{
+ if (atom(or_expr) || car(or_expr) != or_s) {
+ return cons(or_expr, nil);
+ } else {
+ val left = second(or_expr);
+ val right = third(or_expr);
+ return nappend2(flatten_or(left), flatten_or(right));
+ }
+}
+
+static val unflatten_or(val exlist)
+{
+ val f = first(exlist);
+ val r = rest(exlist);
+
+ if (r) {
+ return cons(or_s, cons(f, cons(unflatten_or(r), nil)));
+ } else {
+ return f;
+ }
+}
+
+static val unique_first(val exlist)
+{
+ val f = first(exlist);
+ val r = rest(exlist);
+
+ if (!memqual(f, r))
+ return cons(first(exlist), nil);
+ return nil;
+}
+
+static val reduce_or(val or_expr)
+{
+ val left = second(or_expr);
+ val right = third(or_expr);
+
+ /*
+ * Do optimization only if this is an or of two or expressions.
+ */
+
+ if (consp(left) && first(left) == or_s &&
+ consp(right) && first(right) == or_s)
+ {
+ val exlist = flatten_or(or_expr);
+ val repeats_removed = mapcon(func_n1(unique_first), exlist);
+ return unflatten_or(repeats_removed);
+ } else {
+ return or_expr;
+ }
+}
+
static val reg_derivative(val, val);
static val reg_derivative_list(val exp_list, val ch)
@@ -1335,7 +1388,7 @@ static val reg_derivative(val exp, val ch)
if (d_arg2 == t)
return d_arg1;
- return cons(or_s, cons(d_arg1, cons(d_arg2, nil)));
+ return reduce_or(cons(or_s, cons(d_arg1, cons(d_arg2, nil))));
} else if (sym == and_s) {
val d_arg1 = reg_derivative(first(args), ch);
val d_arg2 = nil;