diff options
-rw-r--r-- | ChangeLog | 18 | ||||
-rw-r--r-- | lib.c | 17 | ||||
-rw-r--r-- | lib.h | 2 | ||||
-rw-r--r-- | regex.c | 55 |
4 files changed, 91 insertions, 1 deletions
@@ -1,3 +1,21 @@ +2010-01-26 Kaz Kylheku <kkylheku@gmail.com> + + Optimization in derivative-based regex engine. + Exponential memory consumption behavior was observed when + matching the input aaaaaa.... + against the regex a?a?a?a?....aaaa.... + The fix is to eliminate common subexpressions + from the derivative for the or operator. + + * lib.c (memqual, mapcon): New functions + + * lib.h (memqual, mapcon): Declared. + + * regex.c (flatten_or, unflatten_or, + unique_first, reduce_or): New functions. + (reg_derivative): Apply reduce_or + to the constructed disjunction. + 2010-01-25 Kaz Kylheku <kkylheku@gmail.com> Version 032 @@ -364,6 +364,13 @@ val memq(val obj, val list) return list; } +val memqual(val obj, val list) +{ + while (list && !equal(car(list), obj)) + list = cdr(list); + return list; +} + val tree_find(val obj, val tree) { if (equal(obj, tree)) @@ -1749,6 +1756,16 @@ val mapcar(val fun, val list) return out; } +val mapcon(val fun, val list) +{ + list_collect_decl (out, iter); + + for (; list; list = cdr(list)) + list_collect_nconc (iter, funcall1(fun, list)); + + return out; +} + val mappend(val fun, val list) { list_collect_decl (out, iter); @@ -261,6 +261,7 @@ val append2(val list1, val list2); val nappend2(val list1, val list2); val flatten(val list); val memq(val obj, val list); +val memqual(val obj, val list); val tree_find(val obj, val tree); val some_satisfy(val list, val pred, val key); val all_satisfy(val list, val pred, val key); @@ -365,6 +366,7 @@ val alist_remove1(val list, val key); val copy_cons(val cons); val copy_alist(val list); val mapcar(val fun, val list); +val mapcon(val fun, val list); val mappend(val fun, val list); val merge(val list1, val list2, val lessfun, val keyfun); val sort(val list, val lessfun, val keyfun); @@ -1237,6 +1237,59 @@ static val reg_nullable(val exp) } } +static val flatten_or(val or_expr) +{ + if (atom(or_expr) || car(or_expr) != or_s) { + return cons(or_expr, nil); + } else { + val left = second(or_expr); + val right = third(or_expr); + return nappend2(flatten_or(left), flatten_or(right)); + } +} + +static val unflatten_or(val exlist) +{ + val f = first(exlist); + val r = rest(exlist); + + if (r) { + return cons(or_s, cons(f, cons(unflatten_or(r), nil))); + } else { + return f; + } +} + +static val unique_first(val exlist) +{ + val f = first(exlist); + val r = rest(exlist); + + if (!memqual(f, r)) + return cons(first(exlist), nil); + return nil; +} + +static val reduce_or(val or_expr) +{ + val left = second(or_expr); + val right = third(or_expr); + + /* + * Do optimization only if this is an or of two or expressions. + */ + + if (consp(left) && first(left) == or_s && + consp(right) && first(right) == or_s) + { + val exlist = flatten_or(or_expr); + val repeats_removed = mapcon(func_n1(unique_first), exlist); + return unflatten_or(repeats_removed); + } else { + return or_expr; + } +} + static val reg_derivative(val, val); static val reg_derivative_list(val exp_list, val ch) @@ -1335,7 +1388,7 @@ static val reg_derivative(val exp, val ch) if (d_arg2 == t) return d_arg1; - return cons(or_s, cons(d_arg1, cons(d_arg2, nil))); + return reduce_or(cons(or_s, cons(d_arg1, cons(d_arg2, nil)))); } else if (sym == and_s) { val d_arg1 = reg_derivative(first(args), ch); val d_arg2 = nil; |