diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2010-01-26 14:12:35 -0800 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2010-01-26 14:12:35 -0800 |
commit | a0d9dcfe97e824e25174a3bfc9116e505eac0a51 (patch) | |
tree | 3b6bd544c75344f0bdb81f0c34226be77519c129 | |
parent | 16a6ea72db8469f64f31903bea7b223f0072bca6 (diff) | |
download | txr-a0d9dcfe97e824e25174a3bfc9116e505eac0a51.tar.gz txr-a0d9dcfe97e824e25174a3bfc9116e505eac0a51.tar.bz2 txr-a0d9dcfe97e824e25174a3bfc9116e505eac0a51.zip |
Optimization in derivative-based regex engine.
Exponential memory consumption behavior was observed when
matching the input aaaaaa....
against the regex a?a?a?a?....aaaa....
The fix is to eliminate common subexpressions
from the derivative for the or operator.
-rw-r--r-- | ChangeLog | 18 | ||||
-rw-r--r-- | lib.c | 17 | ||||
-rw-r--r-- | lib.h | 2 | ||||
-rw-r--r-- | regex.c | 55 |
4 files changed, 91 insertions, 1 deletions
@@ -1,3 +1,21 @@ +2010-01-26 Kaz Kylheku <kkylheku@gmail.com> + + Optimization in derivative-based regex engine. + Exponential memory consumption behavior was observed when + matching the input aaaaaa.... + against the regex a?a?a?a?....aaaa.... + The fix is to eliminate common subexpressions + from the derivative for the or operator. + + * lib.c (memqual, mapcon): New functions + + * lib.h (memqual, mapcon): Declared. + + * regex.c (flatten_or, unflatten_or, + unique_first, reduce_or): New functions. + (reg_derivative): Apply reduce_or + to the constructed disjunction. + 2010-01-25 Kaz Kylheku <kkylheku@gmail.com> Version 032 @@ -364,6 +364,13 @@ val memq(val obj, val list) return list; } +val memqual(val obj, val list) +{ + while (list && !equal(car(list), obj)) + list = cdr(list); + return list; +} + val tree_find(val obj, val tree) { if (equal(obj, tree)) @@ -1749,6 +1756,16 @@ val mapcar(val fun, val list) return out; } +val mapcon(val fun, val list) +{ + list_collect_decl (out, iter); + + for (; list; list = cdr(list)) + list_collect_nconc (iter, funcall1(fun, list)); + + return out; +} + val mappend(val fun, val list) { list_collect_decl (out, iter); @@ -261,6 +261,7 @@ val append2(val list1, val list2); val nappend2(val list1, val list2); val flatten(val list); val memq(val obj, val list); +val memqual(val obj, val list); val tree_find(val obj, val tree); val some_satisfy(val list, val pred, val key); val all_satisfy(val list, val pred, val key); @@ -365,6 +366,7 @@ val alist_remove1(val list, val key); val copy_cons(val cons); val copy_alist(val list); val mapcar(val fun, val list); +val mapcon(val fun, val list); val mappend(val fun, val list); val merge(val list1, val list2, val lessfun, val keyfun); val sort(val list, val lessfun, val keyfun); @@ -1237,6 +1237,59 @@ static val reg_nullable(val exp) } } +static val flatten_or(val or_expr) +{ + if (atom(or_expr) || car(or_expr) != or_s) { + return cons(or_expr, nil); + } else { + val left = second(or_expr); + val right = third(or_expr); + return nappend2(flatten_or(left), flatten_or(right)); + } +} + +static val unflatten_or(val exlist) +{ + val f = first(exlist); + val r = rest(exlist); + + if (r) { + return cons(or_s, cons(f, cons(unflatten_or(r), nil))); + } else { + return f; + } +} + +static val unique_first(val exlist) +{ + val f = first(exlist); + val r = rest(exlist); + + if (!memqual(f, r)) + return cons(first(exlist), nil); + return nil; +} + +static val reduce_or(val or_expr) +{ + val left = second(or_expr); + val right = third(or_expr); + + /* + * Do optimization only if this is an or of two or expressions. + */ + + if (consp(left) && first(left) == or_s && + consp(right) && first(right) == or_s) + { + val exlist = flatten_or(or_expr); + val repeats_removed = mapcon(func_n1(unique_first), exlist); + return unflatten_or(repeats_removed); + } else { + return or_expr; + } +} + static val reg_derivative(val, val); static val reg_derivative_list(val exp_list, val ch) @@ -1335,7 +1388,7 @@ static val reg_derivative(val exp, val ch) if (d_arg2 == t) return d_arg1; - return cons(or_s, cons(d_arg1, cons(d_arg2, nil))); + return reduce_or(cons(or_s, cons(d_arg1, cons(d_arg2, nil)))); } else if (sym == and_s) { val d_arg1 = reg_derivative(first(args), ch); val d_arg2 = nil; |