summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2010-01-26 14:12:35 -0800
committerKaz Kylheku <kaz@kylheku.com>2010-01-26 14:12:35 -0800
commita0d9dcfe97e824e25174a3bfc9116e505eac0a51 (patch)
tree3b6bd544c75344f0bdb81f0c34226be77519c129
parent16a6ea72db8469f64f31903bea7b223f0072bca6 (diff)
downloadtxr-a0d9dcfe97e824e25174a3bfc9116e505eac0a51.tar.gz
txr-a0d9dcfe97e824e25174a3bfc9116e505eac0a51.tar.bz2
txr-a0d9dcfe97e824e25174a3bfc9116e505eac0a51.zip
Optimization in derivative-based regex engine.
Exponential memory consumption behavior was observed when matching the input aaaaaa.... against the regex a?a?a?a?....aaaa.... The fix is to eliminate common subexpressions from the derivative for the or operator.
-rw-r--r--ChangeLog18
-rw-r--r--lib.c17
-rw-r--r--lib.h2
-rw-r--r--regex.c55
4 files changed, 91 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index 7868167c..c1a37dc3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,21 @@
+2010-01-26 Kaz Kylheku <kkylheku@gmail.com>
+
+ Optimization in derivative-based regex engine.
+ Exponential memory consumption behavior was observed when
+ matching the input aaaaaa....
+ against the regex a?a?a?a?....aaaa....
+ The fix is to eliminate common subexpressions
+ from the derivative for the or operator.
+
+ * lib.c (memqual, mapcon): New functions
+
+ * lib.h (memqual, mapcon): Declared.
+
+ * regex.c (flatten_or, unflatten_or,
+ unique_first, reduce_or): New functions.
+ (reg_derivative): Apply reduce_or
+ to the constructed disjunction.
+
2010-01-25 Kaz Kylheku <kkylheku@gmail.com>
Version 032
diff --git a/lib.c b/lib.c
index 45adedbd..30d7b661 100644
--- a/lib.c
+++ b/lib.c
@@ -364,6 +364,13 @@ val memq(val obj, val list)
return list;
}
+val memqual(val obj, val list)
+{
+ while (list && !equal(car(list), obj))
+ list = cdr(list);
+ return list;
+}
+
val tree_find(val obj, val tree)
{
if (equal(obj, tree))
@@ -1749,6 +1756,16 @@ val mapcar(val fun, val list)
return out;
}
+val mapcon(val fun, val list)
+{
+ list_collect_decl (out, iter);
+
+ for (; list; list = cdr(list))
+ list_collect_nconc (iter, funcall1(fun, list));
+
+ return out;
+}
+
val mappend(val fun, val list)
{
list_collect_decl (out, iter);
diff --git a/lib.h b/lib.h
index 85e21374..55f0ba6d 100644
--- a/lib.h
+++ b/lib.h
@@ -261,6 +261,7 @@ val append2(val list1, val list2);
val nappend2(val list1, val list2);
val flatten(val list);
val memq(val obj, val list);
+val memqual(val obj, val list);
val tree_find(val obj, val tree);
val some_satisfy(val list, val pred, val key);
val all_satisfy(val list, val pred, val key);
@@ -365,6 +366,7 @@ val alist_remove1(val list, val key);
val copy_cons(val cons);
val copy_alist(val list);
val mapcar(val fun, val list);
+val mapcon(val fun, val list);
val mappend(val fun, val list);
val merge(val list1, val list2, val lessfun, val keyfun);
val sort(val list, val lessfun, val keyfun);
diff --git a/regex.c b/regex.c
index 42968af2..0c81c262 100644
--- a/regex.c
+++ b/regex.c
@@ -1237,6 +1237,59 @@ static val reg_nullable(val exp)
}
}
+static val flatten_or(val or_expr)
+{
+ if (atom(or_expr) || car(or_expr) != or_s) {
+ return cons(or_expr, nil);
+ } else {
+ val left = second(or_expr);
+ val right = third(or_expr);
+ return nappend2(flatten_or(left), flatten_or(right));
+ }
+}
+
+static val unflatten_or(val exlist)
+{
+ val f = first(exlist);
+ val r = rest(exlist);
+
+ if (r) {
+ return cons(or_s, cons(f, cons(unflatten_or(r), nil)));
+ } else {
+ return f;
+ }
+}
+
+static val unique_first(val exlist)
+{
+ val f = first(exlist);
+ val r = rest(exlist);
+
+ if (!memqual(f, r))
+ return cons(first(exlist), nil);
+ return nil;
+}
+
+static val reduce_or(val or_expr)
+{
+ val left = second(or_expr);
+ val right = third(or_expr);
+
+ /*
+ * Do optimization only if this is an or of two or expressions.
+ */
+
+ if (consp(left) && first(left) == or_s &&
+ consp(right) && first(right) == or_s)
+ {
+ val exlist = flatten_or(or_expr);
+ val repeats_removed = mapcon(func_n1(unique_first), exlist);
+ return unflatten_or(repeats_removed);
+ } else {
+ return or_expr;
+ }
+}
+
static val reg_derivative(val, val);
static val reg_derivative_list(val exp_list, val ch)
@@ -1335,7 +1388,7 @@ static val reg_derivative(val exp, val ch)
if (d_arg2 == t)
return d_arg1;
- return cons(or_s, cons(d_arg1, cons(d_arg2, nil)));
+ return reduce_or(cons(or_s, cons(d_arg1, cons(d_arg2, nil))));
} else if (sym == and_s) {
val d_arg1 = reg_derivative(first(args), ch);
val d_arg2 = nil;